1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
7 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
8 ; X86-LABEL: unpckbw_test:
10 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ## encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
11 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12 ; X86-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
13 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
14 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
15 ; X86-NEXT: retl ## encoding: [0xc3]
17 ; X64-LABEL: unpckbw_test:
19 ; X64-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7]
20 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
21 ; X64-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
22 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
23 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
24 ; X64-NEXT: retq ## encoding: [0xc3]
25 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
29 define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
30 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
32 ; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x4c,0x24,0x01]
33 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
34 ; X86-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc1]
35 ; X86-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd1]
36 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
37 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
38 ; X86-NEXT: retl ## encoding: [0xc3]
40 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
42 ; X64-NEXT: vpbroadcastd %edi, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xcf]
43 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
44 ; X64-NEXT: vpbroadcastd %edi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc7]
45 ; X64-NEXT: vpbroadcastd %edi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd7]
46 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
47 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
48 ; X64-NEXT: retq ## encoding: [0xc3]
49 %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
50 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
51 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
52 %res3 = add <16 x i32> %res, %res1
53 %res4 = add <16 x i32> %res2, %res3
56 declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
59 define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
60 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
62 ; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x8c,0x24,0x04,0x00,0x00,0x00]
63 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
64 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
65 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xc1]
66 ; X86-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xd1]
67 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
68 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
69 ; X86-NEXT: retl ## encoding: [0xc3]
71 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
73 ; X64-NEXT: vpbroadcastq %rdi, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xcf]
74 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
75 ; X64-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x7c,0xc7]
76 ; X64-NEXT: vpbroadcastq %rdi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xd7]
77 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
78 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
79 ; X64-NEXT: retq ## encoding: [0xc3]
80 %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
81 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
82 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
83 %res3 = add <8 x i64> %res, %res1
84 %res4 = add <8 x i64> %res2, %res3
87 declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
90 declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
92 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
93 ; X86-LABEL: test_x86_vbroadcast_ss_ps_512:
95 ; X86-NEXT: vbroadcastss %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xd0]
96 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
97 ; X86-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
98 ; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
99 ; X86-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
100 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
101 ; X86-NEXT: retl ## encoding: [0xc3]
103 ; X64-LABEL: test_x86_vbroadcast_ss_ps_512:
105 ; X64-NEXT: vbroadcastss %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xd0]
106 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
107 ; X64-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
108 ; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
109 ; X64-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
110 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
111 ; X64-NEXT: retq ## encoding: [0xc3]
113 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1)
114 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask)
115 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask)
116 %res3 = fadd <16 x float> %res, %res1
117 %res4 = fadd <16 x float> %res2, %res3
118 ret <16 x float> %res4
121 declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
123 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
124 ; X86-LABEL: test_x86_vbroadcast_sd_pd_512:
126 ; X86-NEXT: vbroadcastsd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xd0]
127 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
128 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
129 ; X86-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
130 ; X86-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
131 ; X86-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
132 ; X86-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
133 ; X86-NEXT: retl ## encoding: [0xc3]
135 ; X64-LABEL: test_x86_vbroadcast_sd_pd_512:
137 ; X64-NEXT: vbroadcastsd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xd0]
138 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
139 ; X64-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
140 ; X64-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
141 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
142 ; X64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
143 ; X64-NEXT: retq ## encoding: [0xc3]
145 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1)
146 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask)
147 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask)
148 %res3 = fadd <8 x double> %res, %res1
149 %res4 = fadd <8 x double> %res2, %res3
150 ret <8 x double> %res4
153 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
155 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
156 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_512:
158 ; X86-NEXT: vpbroadcastd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0xd0]
159 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
160 ; X86-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
161 ; X86-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
162 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
163 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
164 ; X86-NEXT: retl ## encoding: [0xc3]
166 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_512:
168 ; X64-NEXT: vpbroadcastd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0xd0]
169 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
170 ; X64-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
171 ; X64-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
172 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
173 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
174 ; X64-NEXT: retq ## encoding: [0xc3]
175 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
176 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
177 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
178 %res3 = add <16 x i32> %res, %res1
179 %res4 = add <16 x i32> %res2, %res3
183 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
185 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
186 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_512:
188 ; X86-NEXT: vpbroadcastq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd0]
189 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
190 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
191 ; X86-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
192 ; X86-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
193 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
194 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
195 ; X86-NEXT: retl ## encoding: [0xc3]
197 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_512:
199 ; X64-NEXT: vpbroadcastq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd0]
200 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
201 ; X64-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
202 ; X64-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
203 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
204 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
205 ; X64-NEXT: retq ## encoding: [0xc3]
206 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
207 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
208 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
209 %res3 = add <8 x i64> %res, %res1
210 %res4 = add <8 x i64> %res2, %res3
214 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
216 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
217 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_512:
219 ; X86-NEXT: vmovsldup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xd0]
220 ; X86-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
221 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
222 ; X86-NEXT: vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
223 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
224 ; X86-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
225 ; X86-NEXT: vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
226 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
227 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
228 ; X86-NEXT: retl ## encoding: [0xc3]
230 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_512:
232 ; X64-NEXT: vmovsldup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xd0]
233 ; X64-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
234 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
235 ; X64-NEXT: vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
236 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
237 ; X64-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
238 ; X64-NEXT: vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
239 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
240 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
241 ; X64-NEXT: retq ## encoding: [0xc3]
242 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
243 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
244 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
245 %res3 = fadd <16 x float> %res, %res1
246 %res4 = fadd <16 x float> %res2, %res3
247 ret <16 x float> %res4
250 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
252 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
253 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_512:
255 ; X86-NEXT: vmovshdup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xd0]
256 ; X86-NEXT: ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
257 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
258 ; X86-NEXT: vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
259 ; X86-NEXT: ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
260 ; X86-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
261 ; X86-NEXT: vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
262 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
263 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
264 ; X86-NEXT: retl ## encoding: [0xc3]
266 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_512:
268 ; X64-NEXT: vmovshdup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xd0]
269 ; X64-NEXT: ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
270 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
271 ; X64-NEXT: vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
272 ; X64-NEXT: ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
273 ; X64-NEXT: vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
274 ; X64-NEXT: vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
275 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
276 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
277 ; X64-NEXT: retq ## encoding: [0xc3]
278 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
279 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
280 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
281 %res3 = fadd <16 x float> %res, %res1
282 %res4 = fadd <16 x float> %res2, %res3
283 ret <16 x float> %res4
286 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
288 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
289 ; X86-LABEL: test_int_x86_avx512_mask_movddup_512:
291 ; X86-NEXT: vmovddup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xd0]
292 ; X86-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6]
293 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
294 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
295 ; X86-NEXT: vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
296 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
297 ; X86-NEXT: vaddpd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
298 ; X86-NEXT: vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
299 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
300 ; X86-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
301 ; X86-NEXT: retl ## encoding: [0xc3]
303 ; X64-LABEL: test_int_x86_avx512_mask_movddup_512:
305 ; X64-NEXT: vmovddup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xd0]
306 ; X64-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6]
307 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
308 ; X64-NEXT: vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
309 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
310 ; X64-NEXT: vaddpd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
311 ; X64-NEXT: vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
312 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
313 ; X64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
314 ; X64-NEXT: retq ## encoding: [0xc3]
315 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
316 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
317 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
318 %res3 = fadd <8 x double> %res, %res1
319 %res4 = fadd <8 x double> %res2, %res3
320 ret <8 x double> %res4
323 declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8)
325 define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, i32 %x1, <8 x double> %x2, i8 %x3) {
326 ; X86-LABEL: test_int_x86_avx512_mask_perm_df_512:
328 ; X86-NEXT: vpermpd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xd0,0x03]
329 ; X86-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
330 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
331 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
332 ; X86-NEXT: vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
333 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
334 ; X86-NEXT: vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
335 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
336 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
337 ; X86-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
338 ; X86-NEXT: retl ## encoding: [0xc3]
340 ; X64-LABEL: test_int_x86_avx512_mask_perm_df_512:
342 ; X64-NEXT: vpermpd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xd0,0x03]
343 ; X64-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
344 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
345 ; X64-NEXT: vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
346 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
347 ; X64-NEXT: vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
348 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
349 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
350 ; X64-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
351 ; X64-NEXT: retq ## encoding: [0xc3]
352 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3)
353 %res1 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3)
354 %res2 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1)
355 %res3 = fadd <8 x double> %res, %res1
356 %res4 = fadd <8 x double> %res3, %res2
357 ret <8 x double> %res4
360 declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8)
362 define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
363 ; X86-LABEL: test_int_x86_avx512_mask_perm_di_512:
365 ; X86-NEXT: vpermq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x00,0xd0,0x03]
366 ; X86-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
367 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
368 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
369 ; X86-NEXT: vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
370 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
371 ; X86-NEXT: vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
372 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
373 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
374 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
375 ; X86-NEXT: retl ## encoding: [0xc3]
377 ; X64-LABEL: test_int_x86_avx512_mask_perm_di_512:
379 ; X64-NEXT: vpermq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x00,0xd0,0x03]
380 ; X64-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
381 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
382 ; X64-NEXT: vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
383 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
384 ; X64-NEXT: vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
385 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
386 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
387 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
388 ; X64-NEXT: retq ## encoding: [0xc3]
389 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
390 %res1 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
391 %res2 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
392 %res3 = add <8 x i64> %res, %res1
393 %res4 = add <8 x i64> %res3, %res2
397 define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
398 ; X86-LABEL: test_store1:
400 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
401 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
402 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
403 ; X86-NEXT: vmovups %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x01]
404 ; X86-NEXT: vmovups %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00]
405 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
406 ; X86-NEXT: retl ## encoding: [0xc3]
408 ; X64-LABEL: test_store1:
410 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
411 ; X64-NEXT: vmovups %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
412 ; X64-NEXT: vmovups %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x06]
413 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
414 ; X64-NEXT: retq ## encoding: [0xc3]
415 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
416 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
420 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
422 define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
423 ; X86-LABEL: test_store2:
425 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
426 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
427 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
428 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
429 ; X86-NEXT: vmovupd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x01]
430 ; X86-NEXT: vmovupd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x00]
431 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
432 ; X86-NEXT: retl ## encoding: [0xc3]
434 ; X64-LABEL: test_store2:
436 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
437 ; X64-NEXT: vmovupd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
438 ; X64-NEXT: vmovupd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x06]
439 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
440 ; X64-NEXT: retq ## encoding: [0xc3]
441 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
442 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
446 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
448 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
449 ; X86-LABEL: test_mask_store_aligned_ps:
451 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
452 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
453 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
454 ; X86-NEXT: vmovaps %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x01]
455 ; X86-NEXT: vmovaps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x00]
456 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
457 ; X86-NEXT: retl ## encoding: [0xc3]
459 ; X64-LABEL: test_mask_store_aligned_ps:
461 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
462 ; X64-NEXT: vmovaps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07]
463 ; X64-NEXT: vmovaps %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x06]
464 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
465 ; X64-NEXT: retq ## encoding: [0xc3]
466 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
467 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
471 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
473 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
474 ; X86-LABEL: test_mask_store_aligned_pd:
476 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
477 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
478 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
479 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
480 ; X86-NEXT: vmovapd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x01]
481 ; X86-NEXT: vmovapd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x00]
482 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
483 ; X86-NEXT: retl ## encoding: [0xc3]
485 ; X64-LABEL: test_mask_store_aligned_pd:
487 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
488 ; X64-NEXT: vmovapd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x07]
489 ; X64-NEXT: vmovapd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x06]
490 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
491 ; X64-NEXT: retq ## encoding: [0xc3]
492 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
493 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
497 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
499 define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
500 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_512:
502 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
503 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
504 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
505 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
506 ; X86-NEXT: vmovdqu64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x01]
507 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
508 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
509 ; X86-NEXT: retl ## encoding: [0xc3]
511 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_512:
513 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
514 ; X64-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x07]
515 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
516 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
517 ; X64-NEXT: retq ## encoding: [0xc3]
518 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
519 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
523 declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8)
525 define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
526 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_512:
528 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
529 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
530 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
531 ; X86-NEXT: vmovdqu32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x01]
532 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
533 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
534 ; X86-NEXT: retl ## encoding: [0xc3]
536 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_512:
538 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
539 ; X64-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x07]
540 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
541 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
542 ; X64-NEXT: retq ## encoding: [0xc3]
543 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
544 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
548 declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16)
550 define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
551 ; X86-LABEL: test_int_x86_avx512_mask_store_q_512:
553 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
554 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
555 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
556 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
557 ; X86-NEXT: vmovdqa64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x01]
558 ; X86-NEXT: vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
559 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
560 ; X86-NEXT: retl ## encoding: [0xc3]
562 ; X64-LABEL: test_int_x86_avx512_mask_store_q_512:
564 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
565 ; X64-NEXT: vmovdqa64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x07]
566 ; X64-NEXT: vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
567 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
568 ; X64-NEXT: retq ## encoding: [0xc3]
569 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
570 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
574 declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8)
576 define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
577 ; X86-LABEL: test_int_x86_avx512_mask_store_d_512:
579 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
580 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
581 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
582 ; X86-NEXT: vmovdqa32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x01]
583 ; X86-NEXT: vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
584 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
585 ; X86-NEXT: retl ## encoding: [0xc3]
587 ; X64-LABEL: test_int_x86_avx512_mask_store_d_512:
589 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
590 ; X64-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x07]
591 ; X64-NEXT: vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
592 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
593 ; X64-NEXT: retq ## encoding: [0xc3]
594 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
595 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
599 declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16)
601 define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
602 ; X86-LABEL: test_mask_load_aligned_ps:
604 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
605 ; X86-NEXT: vmovaps (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x00]
606 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
607 ; X86-NEXT: vmovaps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x00]
608 ; X86-NEXT: vmovaps (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x08]
609 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
610 ; X86-NEXT: retl ## encoding: [0xc3]
612 ; X64-LABEL: test_mask_load_aligned_ps:
614 ; X64-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
615 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
616 ; X64-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
617 ; X64-NEXT: vmovaps (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x0f]
618 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
619 ; X64-NEXT: retq ## encoding: [0xc3]
620 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
621 %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
622 %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
623 %res4 = fadd <16 x float> %res2, %res1
624 ret <16 x float> %res4
627 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
629 define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
630 ; X86-LABEL: test_mask_load_unaligned_ps:
632 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
633 ; X86-NEXT: vmovups (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00]
634 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
635 ; X86-NEXT: vmovups (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x00]
636 ; X86-NEXT: vmovups (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x08]
637 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
638 ; X86-NEXT: retl ## encoding: [0xc3]
640 ; X64-LABEL: test_mask_load_unaligned_ps:
642 ; X64-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
643 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
644 ; X64-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
645 ; X64-NEXT: vmovups (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x0f]
646 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
647 ; X64-NEXT: retq ## encoding: [0xc3]
648 %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
649 %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
650 %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
651 %res4 = fadd <16 x float> %res2, %res1
652 ret <16 x float> %res4
655 declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
657 define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
658 ; X86-LABEL: test_mask_load_aligned_pd:
660 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
661 ; X86-NEXT: vmovapd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x00]
662 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
663 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
664 ; X86-NEXT: vmovapd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x00]
665 ; X86-NEXT: vmovapd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x08]
666 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
667 ; X86-NEXT: retl ## encoding: [0xc3]
669 ; X64-LABEL: test_mask_load_aligned_pd:
671 ; X64-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
672 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
673 ; X64-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
674 ; X64-NEXT: vmovapd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x0f]
675 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
676 ; X64-NEXT: retq ## encoding: [0xc3]
677 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
678 %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
679 %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
680 %res4 = fadd <8 x double> %res2, %res1
681 ret <8 x double> %res4
684 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
686 define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
687 ; X86-LABEL: test_mask_load_unaligned_pd:
689 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
690 ; X86-NEXT: vmovupd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x00]
691 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
692 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
693 ; X86-NEXT: vmovupd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x00]
694 ; X86-NEXT: vmovupd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x08]
695 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
696 ; X86-NEXT: retl ## encoding: [0xc3]
698 ; X64-LABEL: test_mask_load_unaligned_pd:
700 ; X64-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
701 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
702 ; X64-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
703 ; X64-NEXT: vmovupd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x0f]
704 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
705 ; X64-NEXT: retq ## encoding: [0xc3]
706 %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
707 %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
708 %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
709 %res4 = fadd <8 x double> %res2, %res1
710 ret <8 x double> %res4
713 declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
715 declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8*, <16 x i32>, i16)
717 define <16 x i32> @test_mask_load_unaligned_d(i8* %ptr, i8* %ptr2, <16 x i32> %data, i16 %mask) {
718 ; X86-LABEL: test_mask_load_unaligned_d:
720 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
721 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
722 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
723 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
724 ; X86-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x00]
725 ; X86-NEXT: vmovdqu32 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x09]
726 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
727 ; X86-NEXT: retl ## encoding: [0xc3]
729 ; X64-LABEL: test_mask_load_unaligned_d:
731 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
732 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
733 ; X64-NEXT: vmovdqu32 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x06]
734 ; X64-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x0f]
735 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
736 ; X64-NEXT: retq ## encoding: [0xc3]
737 %res = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
738 %res1 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr2, <16 x i32> %res, i16 %mask)
739 %res2 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
740 %res4 = add <16 x i32> %res2, %res1
744 declare <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8*, <8 x i64>, i8)
746 define <8 x i64> @test_mask_load_unaligned_q(i8* %ptr, i8* %ptr2, <8 x i64> %data, i8 %mask) {
747 ; X86-LABEL: test_mask_load_unaligned_q:
749 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
750 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
751 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
752 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
753 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
754 ; X86-NEXT: vmovdqu64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x00]
755 ; X86-NEXT: vmovdqu64 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x09]
756 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
757 ; X86-NEXT: retl ## encoding: [0xc3]
759 ; X64-LABEL: test_mask_load_unaligned_q:
761 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
762 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
763 ; X64-NEXT: vmovdqu64 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x06]
764 ; X64-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x0f]
765 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
766 ; X64-NEXT: retq ## encoding: [0xc3]
767 %res = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
768 %res1 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr2, <8 x i64> %res, i8 %mask)
769 %res2 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
770 %res4 = add <8 x i64> %res2, %res1
774 declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8*, <16 x i32>, i16)
776 define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, i8* %ptr, i16 %mask) {
777 ; X86-LABEL: test_mask_load_aligned_d:
779 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
780 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
781 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
782 ; X86-NEXT: vmovdqa32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x00]
783 ; X86-NEXT: vmovdqa32 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x08]
784 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
785 ; X86-NEXT: retl ## encoding: [0xc3]
787 ; X64-LABEL: test_mask_load_aligned_d:
789 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
790 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
791 ; X64-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07]
792 ; X64-NEXT: vmovdqa32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x0f]
793 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
794 ; X64-NEXT: retq ## encoding: [0xc3]
795 %res = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
796 %res1 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> %res, i16 %mask)
797 %res2 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
798 %res4 = add <16 x i32> %res2, %res1
802 declare <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8*, <8 x i64>, i8)
804 define <8 x i64> @test_mask_load_aligned_q(<8 x i64> %data, i8* %ptr, i8 %mask) {
805 ; X86-LABEL: test_mask_load_aligned_q:
807 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
808 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
809 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
810 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
811 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x00]
812 ; X86-NEXT: vmovdqa64 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x08]
813 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
814 ; X86-NEXT: retl ## encoding: [0xc3]
816 ; X64-LABEL: test_mask_load_aligned_q:
818 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
819 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
820 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07]
821 ; X64-NEXT: vmovdqa64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x0f]
822 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
823 ; X64-NEXT: retq ## encoding: [0xc3]
824 %res = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
825 %res1 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> %res, i8 %mask)
826 %res2 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
827 %res4 = add <8 x i64> %res2, %res1
831 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
833 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
834 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
836 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xd0,0x16]
837 ; X86-NEXT: ## zmm2 = zmm0[0,1,3,2,5,4,6,6]
838 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
839 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
840 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
841 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
842 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
843 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
844 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
845 ; X86-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
846 ; X86-NEXT: retl ## encoding: [0xc3]
848 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
850 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xd0,0x16]
851 ; X64-NEXT: ## zmm2 = zmm0[0,1,3,2,5,4,6,6]
852 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
853 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
854 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
855 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
856 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
857 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
858 ; X64-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
859 ; X64-NEXT: retq ## encoding: [0xc3]
860 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
861 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
862 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
863 %res3 = fadd <8 x double> %res, %res1
864 %res4 = fadd <8 x double> %res3, %res2
865 ret <8 x double> %res4
868 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
870 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
871 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
873 ; X86-NEXT: vpermilps $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xd0,0x16]
874 ; X86-NEXT: ## zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
875 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
876 ; X86-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
877 ; X86-NEXT: ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
878 ; X86-NEXT: vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
879 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
880 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
881 ; X86-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
882 ; X86-NEXT: retl ## encoding: [0xc3]
884 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
886 ; X64-NEXT: vpermilps $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xd0,0x16]
887 ; X64-NEXT: ## zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
888 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
889 ; X64-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
890 ; X64-NEXT: ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
891 ; X64-NEXT: vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
892 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
893 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
894 ; X64-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
895 ; X64-NEXT: retq ## encoding: [0xc3]
896 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
897 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
898 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
899 %res3 = fadd <16 x float> %res, %res1
900 %res4 = fadd <16 x float> %res3, %res2
901 ret <16 x float> %res4
904 declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16)
906 define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
907 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
909 ; X86-NEXT: vpshufd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7d,0x48,0x70,0xd0,0x03]
910 ; X86-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
911 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
912 ; X86-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
913 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
914 ; X86-NEXT: vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
915 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
916 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
917 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
918 ; X86-NEXT: retl ## encoding: [0xc3]
920 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
922 ; X64-NEXT: vpshufd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7d,0x48,0x70,0xd0,0x03]
923 ; X64-NEXT: ## zmm2 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
924 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
925 ; X64-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
926 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
927 ; X64-NEXT: vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
928 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
929 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
930 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
931 ; X64-NEXT: retq ## encoding: [0xc3]
932 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
933 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
934 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
935 %res3 = add <16 x i32> %res, %res1
936 %res4 = add <16 x i32> %res3, %res2
940 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
941 ; CHECK-LABEL: test_pcmpeq_d:
943 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
944 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
945 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
946 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
947 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
948 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
952 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
953 ; X86-LABEL: test_mask_pcmpeq_d:
955 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
956 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
957 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
958 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
959 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
960 ; X86-NEXT: retl ## encoding: [0xc3]
962 ; X64-LABEL: test_mask_pcmpeq_d:
964 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
965 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
966 ; X64-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
967 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
968 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
969 ; X64-NEXT: retq ## encoding: [0xc3]
970 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
974 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
976 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
977 ; CHECK-LABEL: test_pcmpeq_q:
979 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
980 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
981 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
982 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
983 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
984 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
988 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
989 ; X86-LABEL: test_mask_pcmpeq_q:
991 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
992 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
993 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
994 ; X86-NEXT: ## kill: def $al killed $al killed $eax
995 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
996 ; X86-NEXT: retl ## encoding: [0xc3]
998 ; X64-LABEL: test_mask_pcmpeq_q:
1000 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
1001 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1002 ; X64-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
1003 ; X64-NEXT: ## kill: def $al killed $al killed $eax
1004 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1005 ; X64-NEXT: retq ## encoding: [0xc3]
1006 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1010 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
1012 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
1013 ; CHECK-LABEL: test_pcmpgt_d:
1015 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1016 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1017 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
1018 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1019 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1020 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1024 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1025 ; X86-LABEL: test_mask_pcmpgt_d:
1027 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1028 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1029 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
1030 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
1031 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1032 ; X86-NEXT: retl ## encoding: [0xc3]
1034 ; X64-LABEL: test_mask_pcmpgt_d:
1036 ; X64-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1037 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1038 ; X64-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
1039 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
1040 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1041 ; X64-NEXT: retq ## encoding: [0xc3]
1042 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1046 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
1048 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
1049 ; CHECK-LABEL: test_pcmpgt_q:
1051 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1052 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1053 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
1054 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1055 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1056 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1060 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1061 ; X86-LABEL: test_mask_pcmpgt_q:
1063 ; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1064 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1065 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
1066 ; X86-NEXT: ## kill: def $al killed $al killed $eax
1067 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1068 ; X86-NEXT: retl ## encoding: [0xc3]
1070 ; X64-LABEL: test_mask_pcmpgt_q:
1072 ; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1073 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1074 ; X64-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
1075 ; X64-NEXT: ## kill: def $al killed $al killed $eax
1076 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1077 ; X64-NEXT: retq ## encoding: [0xc3]
1078 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1082 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
1084 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
1086 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
1087 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
1089 ; X86-NEXT: vunpckhpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xd9]
1090 ; X86-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1091 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1092 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1093 ; X86-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
1094 ; X86-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1095 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
1096 ; X86-NEXT: retl ## encoding: [0xc3]
1098 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
1100 ; X64-NEXT: vunpckhpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xd9]
1101 ; X64-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1102 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1103 ; X64-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
1104 ; X64-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1105 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
1106 ; X64-NEXT: retq ## encoding: [0xc3]
1107 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
1108 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
1109 %res2 = fadd <8 x double> %res, %res1
1110 ret <8 x double> %res2
1113 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1115 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
1116 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
1118 ; X86-NEXT: vunpckhps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xd9]
1119 ; X86-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1120 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1121 ; X86-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
1122 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1123 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
1124 ; X86-NEXT: retl ## encoding: [0xc3]
1126 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
1128 ; X64-NEXT: vunpckhps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xd9]
1129 ; X64-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1130 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1131 ; X64-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
1132 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1133 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
1134 ; X64-NEXT: retq ## encoding: [0xc3]
1135 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
1136 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
1137 %res2 = fadd <16 x float> %res, %res1
1138 ret <16 x float> %res2
1141 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
1143 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
1144 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
1146 ; X86-NEXT: vunpcklpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xd9]
1147 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1148 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1149 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1150 ; X86-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
1151 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1152 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
1153 ; X86-NEXT: retl ## encoding: [0xc3]
1155 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
1157 ; X64-NEXT: vunpcklpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xd9]
1158 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1159 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1160 ; X64-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
1161 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1162 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
1163 ; X64-NEXT: retq ## encoding: [0xc3]
1164 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
1165 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
1166 %res2 = fadd <8 x double> %res, %res1
1167 ret <8 x double> %res2
1170 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1172 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
1173 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
1175 ; X86-NEXT: vunpcklps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xd9]
1176 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1177 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1178 ; X86-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
1179 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1180 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
1181 ; X86-NEXT: retl ## encoding: [0xc3]
1183 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
1185 ; X64-NEXT: vunpcklps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xd9]
1186 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1187 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1188 ; X64-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
1189 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1190 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
1191 ; X64-NEXT: retq ## encoding: [0xc3]
1192 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
1193 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
1194 %res2 = fadd <16 x float> %res, %res1
1195 ret <16 x float> %res2
1198 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1200 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
1201 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
1203 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6c,0xd9]
1204 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1205 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1206 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1207 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
1208 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1209 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
1210 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1211 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
1212 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
1213 ; X86-NEXT: retl ## encoding: [0xc3]
1215 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
1217 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6c,0xd9]
1218 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1219 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1220 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
1221 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1222 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
1223 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1224 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
1225 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
1226 ; X64-NEXT: retq ## encoding: [0xc3]
1227 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
1228 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
1229 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
1230 %res3 = add <8 x i64> %res, %res1
1231 %res4 = add <8 x i64> %res2, %res3
1235 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1237 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
1238 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
1240 ; X86-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6d,0xd9]
1241 ; X86-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1242 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1243 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1244 ; X86-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
1245 ; X86-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1246 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
1247 ; X86-NEXT: retl ## encoding: [0xc3]
1249 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
1251 ; X64-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6d,0xd9]
1252 ; X64-NEXT: ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1253 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1254 ; X64-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
1255 ; X64-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1256 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
1257 ; X64-NEXT: retq ## encoding: [0xc3]
1258 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
1259 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
1260 %res2 = add <8 x i64> %res, %res1
1264 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1266 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
1267 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
1269 ; X86-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x6a,0xd9]
1270 ; X86-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1271 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1272 ; X86-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
1273 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1274 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
1275 ; X86-NEXT: retl ## encoding: [0xc3]
1277 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
1279 ; X64-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x6a,0xd9]
1280 ; X64-NEXT: ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1281 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1282 ; X64-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
1283 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1284 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
1285 ; X64-NEXT: retq ## encoding: [0xc3]
1286 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
1287 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
1288 %res2 = add <16 x i32> %res, %res1
1289 ret <16 x i32> %res2
1292 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1294 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
1295 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
1297 ; X86-NEXT: vpunpckldq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x62,0xd9]
1298 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1299 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1300 ; X86-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
1301 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1302 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
1303 ; X86-NEXT: retl ## encoding: [0xc3]
1305 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
1307 ; X64-NEXT: vpunpckldq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x62,0xd9]
1308 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1309 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1310 ; X64-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
1311 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1312 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
1313 ; X64-NEXT: retq ## encoding: [0xc3]
1314 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
1315 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
1316 %res2 = add <16 x i32> %res, %res1
1317 ret <16 x i32> %res2
1320 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1321 ; CHECK-LABEL: test_x86_avx512_pslli_d:
1323 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x07]
1324 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1325 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1329 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1330 ; X86-LABEL: test_x86_avx512_mask_pslli_d:
1332 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1333 ; X86-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
1334 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1335 ; X86-NEXT: retl ## encoding: [0xc3]
1337 ; X64-LABEL: test_x86_avx512_mask_pslli_d:
1339 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1340 ; X64-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
1341 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1342 ; X64-NEXT: retq ## encoding: [0xc3]
1343 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1347 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1348 ; X86-LABEL: test_x86_avx512_maskz_pslli_d:
1350 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1351 ; X86-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
1352 ; X86-NEXT: retl ## encoding: [0xc3]
1354 ; X64-LABEL: test_x86_avx512_maskz_pslli_d:
1356 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1357 ; X64-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
1358 ; X64-NEXT: retq ## encoding: [0xc3]
1359 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1363 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1365 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1366 ; CHECK-LABEL: test_x86_avx512_pslli_q:
1368 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x07]
1369 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1370 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1374 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1375 ; X86-LABEL: test_x86_avx512_mask_pslli_q:
1377 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1378 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1379 ; X86-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
1380 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1381 ; X86-NEXT: retl ## encoding: [0xc3]
1383 ; X64-LABEL: test_x86_avx512_mask_pslli_q:
1385 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1386 ; X64-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
1387 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1388 ; X64-NEXT: retq ## encoding: [0xc3]
1389 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1393 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1394 ; X86-LABEL: test_x86_avx512_maskz_pslli_q:
1396 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1397 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1398 ; X86-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
1399 ; X86-NEXT: retl ## encoding: [0xc3]
1401 ; X64-LABEL: test_x86_avx512_maskz_pslli_q:
1403 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1404 ; X64-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
1405 ; X64-NEXT: retq ## encoding: [0xc3]
1406 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1410 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1412 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1413 ; CHECK-LABEL: test_x86_avx512_psrli_d:
1415 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x07]
1416 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1417 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1421 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1422 ; X86-LABEL: test_x86_avx512_mask_psrli_d:
1424 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1425 ; X86-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
1426 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1427 ; X86-NEXT: retl ## encoding: [0xc3]
1429 ; X64-LABEL: test_x86_avx512_mask_psrli_d:
1431 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1432 ; X64-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
1433 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1434 ; X64-NEXT: retq ## encoding: [0xc3]
1435 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1439 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1440 ; X86-LABEL: test_x86_avx512_maskz_psrli_d:
1442 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1443 ; X86-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
1444 ; X86-NEXT: retl ## encoding: [0xc3]
1446 ; X64-LABEL: test_x86_avx512_maskz_psrli_d:
1448 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1449 ; X64-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
1450 ; X64-NEXT: retq ## encoding: [0xc3]
1451 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1455 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1457 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1458 ; CHECK-LABEL: test_x86_avx512_psrli_q:
1460 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xd0,0x07]
1461 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1462 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1466 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1467 ; X86-LABEL: test_x86_avx512_mask_psrli_q:
1469 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1470 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1471 ; X86-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
1472 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1473 ; X86-NEXT: retl ## encoding: [0xc3]
1475 ; X64-LABEL: test_x86_avx512_mask_psrli_q:
1477 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1478 ; X64-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
1479 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1480 ; X64-NEXT: retq ## encoding: [0xc3]
1481 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1485 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1486 ; X86-LABEL: test_x86_avx512_maskz_psrli_q:
1488 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1489 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1490 ; X86-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
1491 ; X86-NEXT: retl ## encoding: [0xc3]
1493 ; X64-LABEL: test_x86_avx512_maskz_psrli_q:
1495 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1496 ; X64-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
1497 ; X64-NEXT: retq ## encoding: [0xc3]
1498 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1502 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1504 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1505 ; CHECK-LABEL: test_x86_avx512_psrai_d:
1507 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x07]
1508 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1509 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1513 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1514 ; X86-LABEL: test_x86_avx512_mask_psrai_d:
1516 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1517 ; X86-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
1518 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1519 ; X86-NEXT: retl ## encoding: [0xc3]
1521 ; X64-LABEL: test_x86_avx512_mask_psrai_d:
1523 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1524 ; X64-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
1525 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1526 ; X64-NEXT: retq ## encoding: [0xc3]
1527 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1531 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1532 ; X86-LABEL: test_x86_avx512_maskz_psrai_d:
1534 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1535 ; X86-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
1536 ; X86-NEXT: retl ## encoding: [0xc3]
1538 ; X64-LABEL: test_x86_avx512_maskz_psrai_d:
1540 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1541 ; X64-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
1542 ; X64-NEXT: retq ## encoding: [0xc3]
1543 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1547 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1549 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1550 ; CHECK-LABEL: test_x86_avx512_psrai_q:
1552 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x07]
1553 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1554 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1558 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1559 ; X86-LABEL: test_x86_avx512_mask_psrai_q:
1561 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1562 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1563 ; X86-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
1564 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1565 ; X86-NEXT: retl ## encoding: [0xc3]
1567 ; X64-LABEL: test_x86_avx512_mask_psrai_q:
1569 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1570 ; X64-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
1571 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1572 ; X64-NEXT: retq ## encoding: [0xc3]
1573 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1577 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1578 ; X86-LABEL: test_x86_avx512_maskz_psrai_q:
1580 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1581 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1582 ; X86-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
1583 ; X86-NEXT: retl ## encoding: [0xc3]
1585 ; X64-LABEL: test_x86_avx512_maskz_psrai_q:
1587 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1588 ; X64-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
1589 ; X64-NEXT: retq ## encoding: [0xc3]
1590 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1594 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1596 declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>)
1598 define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) {
1599 ; X86-LABEL: test_storent_q_512:
1601 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1602 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1603 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1604 ; X86-NEXT: retl ## encoding: [0xc3]
1606 ; X64-LABEL: test_storent_q_512:
1608 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1609 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1610 ; X64-NEXT: retq ## encoding: [0xc3]
1611 call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data)
1615 declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>)
1617 define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) {
1618 ; X86-LABEL: test_storent_pd_512:
1620 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1621 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1622 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1623 ; X86-NEXT: retl ## encoding: [0xc3]
1625 ; X64-LABEL: test_storent_pd_512:
1627 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1628 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1629 ; X64-NEXT: retq ## encoding: [0xc3]
1630 call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data)
1634 declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>)
1636 define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
1637 ; X86-LABEL: test_storent_ps_512:
1639 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1640 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1641 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1642 ; X86-NEXT: retl ## encoding: [0xc3]
1644 ; X64-LABEL: test_storent_ps_512:
1646 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1647 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1648 ; X64-NEXT: retq ## encoding: [0xc3]
1649 call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data)
1653 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1654 ; CHECK-LABEL: test_xor_epi32:
1656 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1657 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1658 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1659 ret < 16 x i32> %res
1662 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1663 ; X86-LABEL: test_mask_xor_epi32:
1665 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1666 ; X86-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1667 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1668 ; X86-NEXT: retl ## encoding: [0xc3]
1670 ; X64-LABEL: test_mask_xor_epi32:
1672 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1673 ; X64-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1674 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1675 ; X64-NEXT: retq ## encoding: [0xc3]
1676 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1677 ret < 16 x i32> %res
1680 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1682 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1683 ; CHECK-LABEL: test_or_epi32:
1685 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1686 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1687 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1688 ret < 16 x i32> %res
1691 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1692 ; X86-LABEL: test_mask_or_epi32:
1694 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1695 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1696 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1697 ; X86-NEXT: retl ## encoding: [0xc3]
1699 ; X64-LABEL: test_mask_or_epi32:
1701 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1702 ; X64-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1703 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1704 ; X64-NEXT: retq ## encoding: [0xc3]
1705 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1706 ret < 16 x i32> %res
1709 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1711 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1712 ; CHECK-LABEL: test_and_epi32:
1714 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1715 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1716 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1717 ret < 16 x i32> %res
1720 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1721 ; X86-LABEL: test_mask_and_epi32:
1723 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1724 ; X86-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1725 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1726 ; X86-NEXT: retl ## encoding: [0xc3]
1728 ; X64-LABEL: test_mask_and_epi32:
1730 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1731 ; X64-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1732 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1733 ; X64-NEXT: retq ## encoding: [0xc3]
1734 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1735 ret < 16 x i32> %res
1738 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1740 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1741 ; CHECK-LABEL: test_xor_epi64:
1743 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1744 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1745 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1749 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1750 ; X86-LABEL: test_mask_xor_epi64:
1752 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1753 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1754 ; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1755 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1756 ; X86-NEXT: retl ## encoding: [0xc3]
1758 ; X64-LABEL: test_mask_xor_epi64:
1760 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1761 ; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1762 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1763 ; X64-NEXT: retq ## encoding: [0xc3]
1764 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1768 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1770 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1771 ; CHECK-LABEL: test_or_epi64:
1773 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1774 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1775 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1779 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1780 ; X86-LABEL: test_mask_or_epi64:
1782 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1783 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1784 ; X86-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1785 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1786 ; X86-NEXT: retl ## encoding: [0xc3]
1788 ; X64-LABEL: test_mask_or_epi64:
1790 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1791 ; X64-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1792 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1793 ; X64-NEXT: retq ## encoding: [0xc3]
1794 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1798 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1800 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1801 ; CHECK-LABEL: test_and_epi64:
1803 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1804 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1805 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1809 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1810 ; X86-LABEL: test_mask_and_epi64:
1812 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1813 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1814 ; X86-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1815 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1816 ; X86-NEXT: retl ## encoding: [0xc3]
1818 ; X64-LABEL: test_mask_and_epi64:
1820 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1821 ; X64-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1822 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1823 ; X64-NEXT: retq ## encoding: [0xc3]
1824 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1828 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1830 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1831 ; CHECK-LABEL: test_mask_add_epi32_rr:
1833 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1834 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1835 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1836 ret < 16 x i32> %res
1839 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1840 ; X86-LABEL: test_mask_add_epi32_rrk:
1842 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1843 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1844 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1845 ; X86-NEXT: retl ## encoding: [0xc3]
1847 ; X64-LABEL: test_mask_add_epi32_rrk:
1849 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1850 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1851 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1852 ; X64-NEXT: retq ## encoding: [0xc3]
1853 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1854 ret < 16 x i32> %res
1857 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1858 ; X86-LABEL: test_mask_add_epi32_rrkz:
1860 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1861 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1862 ; X86-NEXT: retl ## encoding: [0xc3]
1864 ; X64-LABEL: test_mask_add_epi32_rrkz:
1866 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1867 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1868 ; X64-NEXT: retq ## encoding: [0xc3]
1869 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1870 ret < 16 x i32> %res
1873 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1874 ; X86-LABEL: test_mask_add_epi32_rm:
1876 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1877 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x00]
1878 ; X86-NEXT: retl ## encoding: [0xc3]
1880 ; X64-LABEL: test_mask_add_epi32_rm:
1882 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1883 ; X64-NEXT: retq ## encoding: [0xc3]
1884 %b = load <16 x i32>, <16 x i32>* %ptr_b
1885 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1886 ret < 16 x i32> %res
1889 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1890 ; X86-LABEL: test_mask_add_epi32_rmk:
1892 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1893 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1894 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x08]
1895 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1896 ; X86-NEXT: retl ## encoding: [0xc3]
1898 ; X64-LABEL: test_mask_add_epi32_rmk:
1900 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1901 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1902 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1903 ; X64-NEXT: retq ## encoding: [0xc3]
1904 %b = load <16 x i32>, <16 x i32>* %ptr_b
1905 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1906 ret < 16 x i32> %res
1909 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1910 ; X86-LABEL: test_mask_add_epi32_rmkz:
1912 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1913 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1914 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x00]
1915 ; X86-NEXT: retl ## encoding: [0xc3]
1917 ; X64-LABEL: test_mask_add_epi32_rmkz:
1919 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1920 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1921 ; X64-NEXT: retq ## encoding: [0xc3]
1922 %b = load <16 x i32>, <16 x i32>* %ptr_b
1923 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1924 ret < 16 x i32> %res
1927 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1928 ; X86-LABEL: test_mask_add_epi32_rmb:
1930 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1931 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x00]
1932 ; X86-NEXT: retl ## encoding: [0xc3]
1934 ; X64-LABEL: test_mask_add_epi32_rmb:
1936 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1937 ; X64-NEXT: retq ## encoding: [0xc3]
1938 %q = load i32, i32* %ptr_b
1939 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1940 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1941 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1942 ret < 16 x i32> %res
1945 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1946 ; X86-LABEL: test_mask_add_epi32_rmbk:
1948 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1949 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1950 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x08]
1951 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1952 ; X86-NEXT: retl ## encoding: [0xc3]
1954 ; X64-LABEL: test_mask_add_epi32_rmbk:
1956 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1957 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1958 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1959 ; X64-NEXT: retq ## encoding: [0xc3]
1960 %q = load i32, i32* %ptr_b
1961 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1962 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1963 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1964 ret < 16 x i32> %res
1967 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1968 ; X86-LABEL: test_mask_add_epi32_rmbkz:
1970 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1971 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1972 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x00]
1973 ; X86-NEXT: retl ## encoding: [0xc3]
1975 ; X64-LABEL: test_mask_add_epi32_rmbkz:
1977 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1978 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1979 ; X64-NEXT: retq ## encoding: [0xc3]
1980 %q = load i32, i32* %ptr_b
1981 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1982 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1983 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1984 ret < 16 x i32> %res
1987 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1989 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1990 ; CHECK-LABEL: test_mask_sub_epi32_rr:
1992 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1993 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1994 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1995 ret < 16 x i32> %res
1998 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1999 ; X86-LABEL: test_mask_sub_epi32_rrk:
2001 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2002 ; X86-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
2003 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2004 ; X86-NEXT: retl ## encoding: [0xc3]
2006 ; X64-LABEL: test_mask_sub_epi32_rrk:
2008 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2009 ; X64-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
2010 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2011 ; X64-NEXT: retq ## encoding: [0xc3]
2012 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2013 ret < 16 x i32> %res
2016 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2017 ; X86-LABEL: test_mask_sub_epi32_rrkz:
2019 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2020 ; X86-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
2021 ; X86-NEXT: retl ## encoding: [0xc3]
2023 ; X64-LABEL: test_mask_sub_epi32_rrkz:
2025 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2026 ; X64-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
2027 ; X64-NEXT: retq ## encoding: [0xc3]
2028 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2029 ret < 16 x i32> %res
2032 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2033 ; X86-LABEL: test_mask_sub_epi32_rm:
2035 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2036 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x00]
2037 ; X86-NEXT: retl ## encoding: [0xc3]
2039 ; X64-LABEL: test_mask_sub_epi32_rm:
2041 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
2042 ; X64-NEXT: retq ## encoding: [0xc3]
2043 %b = load <16 x i32>, <16 x i32>* %ptr_b
2044 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2045 ret < 16 x i32> %res
2048 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2049 ; X86-LABEL: test_mask_sub_epi32_rmk:
2051 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2052 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2053 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x08]
2054 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2055 ; X86-NEXT: retl ## encoding: [0xc3]
2057 ; X64-LABEL: test_mask_sub_epi32_rmk:
2059 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2060 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
2061 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2062 ; X64-NEXT: retq ## encoding: [0xc3]
2063 %b = load <16 x i32>, <16 x i32>* %ptr_b
2064 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2065 ret < 16 x i32> %res
2068 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2069 ; X86-LABEL: test_mask_sub_epi32_rmkz:
2071 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2072 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2073 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x00]
2074 ; X86-NEXT: retl ## encoding: [0xc3]
2076 ; X64-LABEL: test_mask_sub_epi32_rmkz:
2078 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2079 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
2080 ; X64-NEXT: retq ## encoding: [0xc3]
2081 %b = load <16 x i32>, <16 x i32>* %ptr_b
2082 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2083 ret < 16 x i32> %res
2086 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2087 ; X86-LABEL: test_mask_sub_epi32_rmb:
2089 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2090 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x00]
2091 ; X86-NEXT: retl ## encoding: [0xc3]
2093 ; X64-LABEL: test_mask_sub_epi32_rmb:
2095 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
2096 ; X64-NEXT: retq ## encoding: [0xc3]
2097 %q = load i32, i32* %ptr_b
2098 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2099 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2100 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2101 ret < 16 x i32> %res
2104 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2105 ; X86-LABEL: test_mask_sub_epi32_rmbk:
2107 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2108 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2109 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x08]
2110 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2111 ; X86-NEXT: retl ## encoding: [0xc3]
2113 ; X64-LABEL: test_mask_sub_epi32_rmbk:
2115 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2116 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
2117 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2118 ; X64-NEXT: retq ## encoding: [0xc3]
2119 %q = load i32, i32* %ptr_b
2120 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2121 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2122 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2123 ret < 16 x i32> %res
2126 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2127 ; X86-LABEL: test_mask_sub_epi32_rmbkz:
2129 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2130 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2131 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x00]
2132 ; X86-NEXT: retl ## encoding: [0xc3]
2134 ; X64-LABEL: test_mask_sub_epi32_rmbkz:
2136 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2137 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
2138 ; X64-NEXT: retq ## encoding: [0xc3]
2139 %q = load i32, i32* %ptr_b
2140 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2141 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2142 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2143 ret < 16 x i32> %res
2146 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2148 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2149 ; CHECK-LABEL: test_mask_add_epi64_rr:
2151 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
2152 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2153 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2157 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2158 ; X86-LABEL: test_mask_add_epi64_rrk:
2160 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2161 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2162 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
2163 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2164 ; X86-NEXT: retl ## encoding: [0xc3]
2166 ; X64-LABEL: test_mask_add_epi64_rrk:
2168 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2169 ; X64-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
2170 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2171 ; X64-NEXT: retq ## encoding: [0xc3]
2172 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2176 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2177 ; X86-LABEL: test_mask_add_epi64_rrkz:
2179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2180 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2181 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
2182 ; X86-NEXT: retl ## encoding: [0xc3]
2184 ; X64-LABEL: test_mask_add_epi64_rrkz:
2186 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2187 ; X64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
2188 ; X64-NEXT: retq ## encoding: [0xc3]
2189 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2193 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2194 ; X86-LABEL: test_mask_add_epi64_rm:
2196 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2197 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x00]
2198 ; X86-NEXT: retl ## encoding: [0xc3]
2200 ; X64-LABEL: test_mask_add_epi64_rm:
2202 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
2203 ; X64-NEXT: retq ## encoding: [0xc3]
2204 %b = load <8 x i64>, <8 x i64>* %ptr_b
2205 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2209 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2210 ; X86-LABEL: test_mask_add_epi64_rmk:
2212 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2213 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2214 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2215 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x08]
2216 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2217 ; X86-NEXT: retl ## encoding: [0xc3]
2219 ; X64-LABEL: test_mask_add_epi64_rmk:
2221 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2222 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
2223 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2224 ; X64-NEXT: retq ## encoding: [0xc3]
2225 %b = load <8 x i64>, <8 x i64>* %ptr_b
2226 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2230 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2231 ; X86-LABEL: test_mask_add_epi64_rmkz:
2233 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2234 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2235 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2236 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x00]
2237 ; X86-NEXT: retl ## encoding: [0xc3]
2239 ; X64-LABEL: test_mask_add_epi64_rmkz:
2241 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2242 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
2243 ; X64-NEXT: retq ## encoding: [0xc3]
2244 %b = load <8 x i64>, <8 x i64>* %ptr_b
2245 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2249 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2250 ; X86-LABEL: test_mask_add_epi64_rmb:
2252 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2253 ; X86-NEXT: vpaddq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x00]
2254 ; X86-NEXT: retl ## encoding: [0xc3]
2256 ; X64-LABEL: test_mask_add_epi64_rmb:
2258 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
2259 ; X64-NEXT: retq ## encoding: [0xc3]
2260 %q = load i64, i64* %ptr_b
2261 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2262 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2263 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2267 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2268 ; X86-LABEL: test_mask_add_epi64_rmbk:
2270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2271 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2272 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2273 ; X86-NEXT: vpaddq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x08]
2274 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2275 ; X86-NEXT: retl ## encoding: [0xc3]
2277 ; X64-LABEL: test_mask_add_epi64_rmbk:
2279 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2280 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
2281 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2282 ; X64-NEXT: retq ## encoding: [0xc3]
2283 %q = load i64, i64* %ptr_b
2284 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2285 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2286 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2290 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2291 ; X86-LABEL: test_mask_add_epi64_rmbkz:
2293 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2295 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2296 ; X86-NEXT: vpaddq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x00]
2297 ; X86-NEXT: retl ## encoding: [0xc3]
2299 ; X64-LABEL: test_mask_add_epi64_rmbkz:
2301 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2302 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
2303 ; X64-NEXT: retq ## encoding: [0xc3]
2304 %q = load i64, i64* %ptr_b
2305 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2306 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2307 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2311 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2313 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2314 ; CHECK-LABEL: test_mask_sub_epi64_rr:
2316 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
2317 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2318 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2322 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2323 ; X86-LABEL: test_mask_sub_epi64_rrk:
2325 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2326 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2327 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2328 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2329 ; X86-NEXT: retl ## encoding: [0xc3]
2331 ; X64-LABEL: test_mask_sub_epi64_rrk:
2333 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2334 ; X64-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2335 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2336 ; X64-NEXT: retq ## encoding: [0xc3]
2337 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2341 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2342 ; X86-LABEL: test_mask_sub_epi64_rrkz:
2344 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2345 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2346 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2347 ; X86-NEXT: retl ## encoding: [0xc3]
2349 ; X64-LABEL: test_mask_sub_epi64_rrkz:
2351 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2352 ; X64-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2353 ; X64-NEXT: retq ## encoding: [0xc3]
2354 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2358 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2359 ; X86-LABEL: test_mask_sub_epi64_rm:
2361 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2362 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x00]
2363 ; X86-NEXT: retl ## encoding: [0xc3]
2365 ; X64-LABEL: test_mask_sub_epi64_rm:
2367 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
2368 ; X64-NEXT: retq ## encoding: [0xc3]
2369 %b = load <8 x i64>, <8 x i64>* %ptr_b
2370 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2374 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2375 ; X86-LABEL: test_mask_sub_epi64_rmk:
2377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2378 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2379 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2380 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x08]
2381 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2382 ; X86-NEXT: retl ## encoding: [0xc3]
2384 ; X64-LABEL: test_mask_sub_epi64_rmk:
2386 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2387 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
2388 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2389 ; X64-NEXT: retq ## encoding: [0xc3]
2390 %b = load <8 x i64>, <8 x i64>* %ptr_b
2391 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2395 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2396 ; X86-LABEL: test_mask_sub_epi64_rmkz:
2398 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2399 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2400 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2401 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x00]
2402 ; X86-NEXT: retl ## encoding: [0xc3]
2404 ; X64-LABEL: test_mask_sub_epi64_rmkz:
2406 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2407 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
2408 ; X64-NEXT: retq ## encoding: [0xc3]
2409 %b = load <8 x i64>, <8 x i64>* %ptr_b
2410 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2414 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2415 ; X86-LABEL: test_mask_sub_epi64_rmb:
2417 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2418 ; X86-NEXT: vpsubq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x00]
2419 ; X86-NEXT: retl ## encoding: [0xc3]
2421 ; X64-LABEL: test_mask_sub_epi64_rmb:
2423 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
2424 ; X64-NEXT: retq ## encoding: [0xc3]
2425 %q = load i64, i64* %ptr_b
2426 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2427 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2428 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2432 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2433 ; X86-LABEL: test_mask_sub_epi64_rmbk:
2435 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2436 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2437 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2438 ; X86-NEXT: vpsubq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x08]
2439 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2440 ; X86-NEXT: retl ## encoding: [0xc3]
2442 ; X64-LABEL: test_mask_sub_epi64_rmbk:
2444 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2445 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
2446 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2447 ; X64-NEXT: retq ## encoding: [0xc3]
2448 %q = load i64, i64* %ptr_b
2449 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2450 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2451 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2455 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2456 ; X86-LABEL: test_mask_sub_epi64_rmbkz:
2458 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2459 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2460 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2461 ; X86-NEXT: vpsubq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x00]
2462 ; X86-NEXT: retl ## encoding: [0xc3]
2464 ; X64-LABEL: test_mask_sub_epi64_rmbkz:
2466 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2467 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
2468 ; X64-NEXT: retq ## encoding: [0xc3]
2469 %q = load i64, i64* %ptr_b
2470 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2471 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2472 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2476 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2478 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2479 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
2481 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2482 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2483 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2487 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2488 ; X86-LABEL: test_mask_mullo_epi32_rrk_512:
2490 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2491 ; X86-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2492 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2493 ; X86-NEXT: retl ## encoding: [0xc3]
2495 ; X64-LABEL: test_mask_mullo_epi32_rrk_512:
2497 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2498 ; X64-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2499 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2500 ; X64-NEXT: retq ## encoding: [0xc3]
2501 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2502 ret < 16 x i32> %res
2505 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2506 ; X86-LABEL: test_mask_mullo_epi32_rrkz_512:
2508 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2509 ; X86-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2510 ; X86-NEXT: retl ## encoding: [0xc3]
2512 ; X64-LABEL: test_mask_mullo_epi32_rrkz_512:
2514 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2515 ; X64-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2516 ; X64-NEXT: retq ## encoding: [0xc3]
2517 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2518 ret < 16 x i32> %res
2521 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2522 ; X86-LABEL: test_mask_mullo_epi32_rm_512:
2524 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2525 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x00]
2526 ; X86-NEXT: retl ## encoding: [0xc3]
2528 ; X64-LABEL: test_mask_mullo_epi32_rm_512:
2530 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2531 ; X64-NEXT: retq ## encoding: [0xc3]
2532 %b = load <16 x i32>, <16 x i32>* %ptr_b
2533 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2534 ret < 16 x i32> %res
2537 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2538 ; X86-LABEL: test_mask_mullo_epi32_rmk_512:
2540 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2541 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2542 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x08]
2543 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2544 ; X86-NEXT: retl ## encoding: [0xc3]
2546 ; X64-LABEL: test_mask_mullo_epi32_rmk_512:
2548 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2549 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2550 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2551 ; X64-NEXT: retq ## encoding: [0xc3]
2552 %b = load <16 x i32>, <16 x i32>* %ptr_b
2553 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2554 ret < 16 x i32> %res
2557 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2558 ; X86-LABEL: test_mask_mullo_epi32_rmkz_512:
2560 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2561 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2562 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x00]
2563 ; X86-NEXT: retl ## encoding: [0xc3]
2565 ; X64-LABEL: test_mask_mullo_epi32_rmkz_512:
2567 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2568 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2569 ; X64-NEXT: retq ## encoding: [0xc3]
2570 %b = load <16 x i32>, <16 x i32>* %ptr_b
2571 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2572 ret < 16 x i32> %res
2575 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2576 ; X86-LABEL: test_mask_mullo_epi32_rmb_512:
2578 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2579 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x00]
2580 ; X86-NEXT: retl ## encoding: [0xc3]
2582 ; X64-LABEL: test_mask_mullo_epi32_rmb_512:
2584 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2585 ; X64-NEXT: retq ## encoding: [0xc3]
2586 %q = load i32, i32* %ptr_b
2587 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2588 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2589 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2590 ret < 16 x i32> %res
2593 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2594 ; X86-LABEL: test_mask_mullo_epi32_rmbk_512:
2596 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2597 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2598 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x08]
2599 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2600 ; X86-NEXT: retl ## encoding: [0xc3]
2602 ; X64-LABEL: test_mask_mullo_epi32_rmbk_512:
2604 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2605 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2606 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2607 ; X64-NEXT: retq ## encoding: [0xc3]
2608 %q = load i32, i32* %ptr_b
2609 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2610 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2611 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2612 ret < 16 x i32> %res
2615 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2616 ; X86-LABEL: test_mask_mullo_epi32_rmbkz_512:
2618 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2619 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2620 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x00]
2621 ; X86-NEXT: retl ## encoding: [0xc3]
2623 ; X64-LABEL: test_mask_mullo_epi32_rmbkz_512:
2625 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2626 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2627 ; X64-NEXT: retq ## encoding: [0xc3]
2628 %q = load i32, i32* %ptr_b
2629 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2630 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2631 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2632 ret < 16 x i32> %res
2635 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2638 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
2640 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
2641 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
2643 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16]
2644 ; X86-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2645 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2646 ; X86-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
2647 ; X86-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2648 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
2649 ; X86-NEXT: retl ## encoding: [0xc3]
2651 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
2653 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16]
2654 ; X64-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2655 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2656 ; X64-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
2657 ; X64-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2658 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
2659 ; X64-NEXT: retq ## encoding: [0xc3]
2660 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
2661 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
2662 %res2 = fadd <16 x float> %res, %res1
2663 ret <16 x float> %res2
2666 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
2668 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
2669 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
2671 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16]
2672 ; X86-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2673 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2674 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2675 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
2676 ; X86-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2677 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
2678 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
2679 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2680 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
2681 ; X86-NEXT: retl ## encoding: [0xc3]
2683 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
2685 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16]
2686 ; X64-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2687 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2688 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
2689 ; X64-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2690 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
2691 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
2692 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2693 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
2694 ; X64-NEXT: retq ## encoding: [0xc3]
2695 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
2696 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
2697 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
2699 %res3 = fadd <8 x double> %res, %res1
2700 %res4 = fadd <8 x double> %res3, %res2
2701 ret <8 x double> %res4
2704 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
2706 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
2707 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
2709 ; X86-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16]
2710 ; X86-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2711 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2712 ; X86-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
2713 ; X86-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2714 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2715 ; X86-NEXT: retl ## encoding: [0xc3]
2717 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
2719 ; X64-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16]
2720 ; X64-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2721 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2722 ; X64-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
2723 ; X64-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2724 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2725 ; X64-NEXT: retq ## encoding: [0xc3]
2726 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
2727 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
2728 %res2 = add <16 x i32> %res, %res1
2729 ret <16 x i32> %res2
2732 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
2734 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
2735 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
2737 ; X86-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16]
2738 ; X86-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2739 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2740 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2741 ; X86-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
2742 ; X86-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2743 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2744 ; X86-NEXT: retl ## encoding: [0xc3]
2746 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
2748 ; X64-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16]
2749 ; X64-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2750 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2751 ; X64-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
2752 ; X64-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2753 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2754 ; X64-NEXT: retq ## encoding: [0xc3]
2755 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
2756 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
2757 %res2 = add <8 x i64> %res, %res1
2761 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
2763 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
2764 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
2766 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xd9,0x16]
2767 ; X86-NEXT: ## zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
2768 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2769 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2770 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
2771 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
2772 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
2773 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
2774 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
2775 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
2776 ; X86-NEXT: retl ## encoding: [0xc3]
2778 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
2780 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xd9,0x16]
2781 ; X64-NEXT: ## zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
2782 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2783 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
2784 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
2785 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
2786 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
2787 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
2788 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
2789 ; X64-NEXT: retq ## encoding: [0xc3]
2790 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
2791 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
2792 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
2794 %res3 = fadd <8 x double> %res, %res1
2795 %res4 = fadd <8 x double> %res3, %res2
2796 ret <8 x double> %res4
2799 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
2801 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
2802 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
2804 ; X86-NEXT: vshufps $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xd9,0x16]
2805 ; X86-NEXT: ## zmm3 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
2806 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2807 ; X86-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
2808 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
2809 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
2810 ; X86-NEXT: retl ## encoding: [0xc3]
2812 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
2814 ; X64-NEXT: vshufps $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xd9,0x16]
2815 ; X64-NEXT: ## zmm3 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
2816 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2817 ; X64-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
2818 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
2819 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
2820 ; X64-NEXT: retq ## encoding: [0xc3]
2821 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
2822 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
2823 %res2 = fadd <16 x float> %res, %res1
2824 ret <16 x float> %res2
2827 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2829 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2830 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
2832 ; X86-NEXT: vpmaxsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xd9]
2833 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2834 ; X86-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
2835 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2836 ; X86-NEXT: retl ## encoding: [0xc3]
2838 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
2840 ; X64-NEXT: vpmaxsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xd9]
2841 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2842 ; X64-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
2843 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2844 ; X64-NEXT: retq ## encoding: [0xc3]
2845 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2846 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2847 %res2 = add <16 x i32> %res, %res1
2848 ret <16 x i32> %res2
2851 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2853 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2854 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
2856 ; X86-NEXT: vpmaxsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xd9]
2857 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2858 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2859 ; X86-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
2860 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2861 ; X86-NEXT: retl ## encoding: [0xc3]
2863 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
2865 ; X64-NEXT: vpmaxsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xd9]
2866 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2867 ; X64-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
2868 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2869 ; X64-NEXT: retq ## encoding: [0xc3]
2870 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2871 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2872 %res2 = add <8 x i64> %res, %res1
2876 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2878 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2879 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
2881 ; X86-NEXT: vpmaxud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xd9]
2882 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2883 ; X86-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
2884 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2885 ; X86-NEXT: retl ## encoding: [0xc3]
2887 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
2889 ; X64-NEXT: vpmaxud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xd9]
2890 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2891 ; X64-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
2892 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2893 ; X64-NEXT: retq ## encoding: [0xc3]
2894 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2895 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2896 %res2 = add <16 x i32> %res, %res1
2897 ret <16 x i32> %res2
2900 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2902 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2903 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
2905 ; X86-NEXT: vpmaxuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xd9]
2906 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2907 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2908 ; X86-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
2909 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2910 ; X86-NEXT: retl ## encoding: [0xc3]
2912 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
2914 ; X64-NEXT: vpmaxuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xd9]
2915 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2916 ; X64-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
2917 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2918 ; X64-NEXT: retq ## encoding: [0xc3]
2919 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2920 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2921 %res2 = add <8 x i64> %res, %res1
2925 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2927 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2928 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_512:
2930 ; X86-NEXT: vpminsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xd9]
2931 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2932 ; X86-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
2933 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2934 ; X86-NEXT: retl ## encoding: [0xc3]
2936 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_512:
2938 ; X64-NEXT: vpminsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xd9]
2939 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2940 ; X64-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
2941 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2942 ; X64-NEXT: retq ## encoding: [0xc3]
2943 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2944 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2945 %res2 = add <16 x i32> %res, %res1
2946 ret <16 x i32> %res2
2949 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2951 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2952 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_512:
2954 ; X86-NEXT: vpminsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xd9]
2955 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2956 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2957 ; X86-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
2958 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2959 ; X86-NEXT: retl ## encoding: [0xc3]
2961 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_512:
2963 ; X64-NEXT: vpminsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xd9]
2964 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2965 ; X64-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
2966 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
2967 ; X64-NEXT: retq ## encoding: [0xc3]
2968 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2969 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2970 %res2 = add <8 x i64> %res, %res1
2974 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2976 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2977 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_512:
2979 ; X86-NEXT: vpminud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xd9]
2980 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2981 ; X86-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
2982 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2983 ; X86-NEXT: retl ## encoding: [0xc3]
2985 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_512:
2987 ; X64-NEXT: vpminud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xd9]
2988 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2989 ; X64-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
2990 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2991 ; X64-NEXT: retq ## encoding: [0xc3]
2992 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2993 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2994 %res2 = add <16 x i32> %res, %res1
2995 ret <16 x i32> %res2
2998 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3000 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3001 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_512:
3003 ; X86-NEXT: vpminuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xd9]
3004 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3005 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3006 ; X86-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
3007 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
3008 ; X86-NEXT: retl ## encoding: [0xc3]
3010 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_512:
3012 ; X64-NEXT: vpminuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xd9]
3013 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3014 ; X64-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
3015 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
3016 ; X64-NEXT: retq ## encoding: [0xc3]
3017 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3018 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3019 %res2 = add <8 x i64> %res, %res1
3023 define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3024 ; X86-LABEL: test_mm_mask_move_ss:
3025 ; X86: ## %bb.0: ## %entry
3026 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3027 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3028 ; X86-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
3029 ; X86-NEXT: retl ## encoding: [0xc3]
3031 ; X64-LABEL: test_mm_mask_move_ss:
3032 ; X64: ## %bb.0: ## %entry
3033 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3034 ; X64-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
3035 ; X64-NEXT: retq ## encoding: [0xc3]
3037 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U)
3038 ret <4 x float> %res
3042 define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3043 ; X86-LABEL: test_mm_maskz_move_ss:
3044 ; X86: ## %bb.0: ## %entry
3045 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3046 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3047 ; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
3048 ; X86-NEXT: retl ## encoding: [0xc3]
3050 ; X64-LABEL: test_mm_maskz_move_ss:
3051 ; X64: ## %bb.0: ## %entry
3052 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3053 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
3054 ; X64-NEXT: retq ## encoding: [0xc3]
3056 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U)
3057 ret <4 x float> %res
3060 define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3061 ; X86-LABEL: test_mm_mask_move_sd:
3062 ; X86: ## %bb.0: ## %entry
3063 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3064 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3065 ; X86-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
3066 ; X86-NEXT: retl ## encoding: [0xc3]
3068 ; X64-LABEL: test_mm_mask_move_sd:
3069 ; X64: ## %bb.0: ## %entry
3070 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3071 ; X64-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
3072 ; X64-NEXT: retq ## encoding: [0xc3]
3074 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U)
3075 ret <2 x double> %res
3078 define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3079 ; X86-LABEL: test_mm_maskz_move_sd:
3080 ; X86: ## %bb.0: ## %entry
3081 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3082 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3083 ; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
3084 ; X86-NEXT: retl ## encoding: [0xc3]
3086 ; X64-LABEL: test_mm_maskz_move_sd:
3087 ; X64: ## %bb.0: ## %entry
3088 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3089 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
3090 ; X64-NEXT: retq ## encoding: [0xc3]
3092 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U)
3093 ret <2 x double> %res
3096 declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
3097 declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
3099 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16)
3101 define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
3102 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
3104 ; X86-NEXT: vpmovzxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xd0]
3105 ; X86-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3106 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3107 ; X86-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
3108 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3109 ; X86-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
3110 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3111 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3112 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3113 ; X86-NEXT: retl ## encoding: [0xc3]
3115 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
3117 ; X64-NEXT: vpmovzxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xd0]
3118 ; X64-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3119 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3120 ; X64-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
3121 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3122 ; X64-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
3123 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3124 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3125 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3126 ; X64-NEXT: retq ## encoding: [0xc3]
3127 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
3128 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
3129 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
3130 %res3 = add <16 x i32> %res, %res1
3131 %res4 = add <16 x i32> %res3, %res2
3132 ret <16 x i32> %res4
3135 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8)
3137 define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
3138 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
3140 ; X86-NEXT: vpmovzxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xd0]
3141 ; X86-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3142 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3143 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3144 ; X86-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
3145 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3146 ; X86-NEXT: vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
3147 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3148 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3149 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3150 ; X86-NEXT: retl ## encoding: [0xc3]
3152 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
3154 ; X64-NEXT: vpmovzxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xd0]
3155 ; X64-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3156 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3157 ; X64-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
3158 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3159 ; X64-NEXT: vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
3160 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3161 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3162 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3163 ; X64-NEXT: retq ## encoding: [0xc3]
3164 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
3165 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
3166 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
3167 %res3 = add <8 x i64> %res, %res1
3168 %res4 = add <8 x i64> %res3, %res2
3172 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8)
3174 define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
3175 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
3177 ; X86-NEXT: vpmovzxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xd0]
3178 ; X86-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3180 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3181 ; X86-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
3182 ; X86-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3183 ; X86-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
3184 ; X86-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3185 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3186 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3187 ; X86-NEXT: retl ## encoding: [0xc3]
3189 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
3191 ; X64-NEXT: vpmovzxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xd0]
3192 ; X64-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3193 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3194 ; X64-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
3195 ; X64-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3196 ; X64-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
3197 ; X64-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3198 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3199 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3200 ; X64-NEXT: retq ## encoding: [0xc3]
3201 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
3202 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
3203 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
3204 %res3 = add <8 x i64> %res, %res1
3205 %res4 = add <8 x i64> %res3, %res2
3209 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16)
3211 define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
3212 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
3214 ; X86-NEXT: vpmovzxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xd0]
3215 ; X86-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3216 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3217 ; X86-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
3218 ; X86-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3219 ; X86-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
3220 ; X86-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3221 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3222 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3223 ; X86-NEXT: retl ## encoding: [0xc3]
3225 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
3227 ; X64-NEXT: vpmovzxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xd0]
3228 ; X64-NEXT: ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3229 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3230 ; X64-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
3231 ; X64-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3232 ; X64-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
3233 ; X64-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3234 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3235 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3236 ; X64-NEXT: retq ## encoding: [0xc3]
3237 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
3238 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
3239 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
3240 %res3 = add <16 x i32> %res, %res1
3241 %res4 = add <16 x i32> %res3, %res2
3242 ret <16 x i32> %res4
3245 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8)
3247 define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
3248 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
3250 ; X86-NEXT: vpmovzxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd0]
3251 ; X86-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3252 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3253 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3254 ; X86-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
3255 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3256 ; X86-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
3257 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3258 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3259 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3260 ; X86-NEXT: retl ## encoding: [0xc3]
3262 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
3264 ; X64-NEXT: vpmovzxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd0]
3265 ; X64-NEXT: ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3266 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3267 ; X64-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
3268 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3269 ; X64-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
3270 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3271 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3272 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3273 ; X64-NEXT: retq ## encoding: [0xc3]
3274 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
3275 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
3276 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
3277 %res3 = add <8 x i64> %res, %res1
3278 %res4 = add <8 x i64> %res3, %res2
3282 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16)
3284 define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
3285 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
3287 ; X86-NEXT: vpmovsxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd0]
3288 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3289 ; X86-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
3290 ; X86-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
3291 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3292 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3293 ; X86-NEXT: retl ## encoding: [0xc3]
3295 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
3297 ; X64-NEXT: vpmovsxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd0]
3298 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3299 ; X64-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
3300 ; X64-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
3301 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3302 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3303 ; X64-NEXT: retq ## encoding: [0xc3]
3304 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
3305 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
3306 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
3307 %res3 = add <16 x i32> %res, %res1
3308 %res4 = add <16 x i32> %res3, %res2
3309 ret <16 x i32> %res4
3312 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8)
3314 define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
3315 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
3317 ; X86-NEXT: vpmovsxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xd0]
3318 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3319 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3320 ; X86-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
3321 ; X86-NEXT: vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
3322 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3323 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3324 ; X86-NEXT: retl ## encoding: [0xc3]
3326 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
3328 ; X64-NEXT: vpmovsxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xd0]
3329 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3330 ; X64-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
3331 ; X64-NEXT: vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
3332 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3333 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3334 ; X64-NEXT: retq ## encoding: [0xc3]
3335 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
3336 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
3337 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
3338 %res3 = add <8 x i64> %res, %res1
3339 %res4 = add <8 x i64> %res3, %res2
3343 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8)
3345 define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
3346 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
3348 ; X86-NEXT: vpmovsxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xd0]
3349 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3350 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3351 ; X86-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
3352 ; X86-NEXT: vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
3353 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3354 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3355 ; X86-NEXT: retl ## encoding: [0xc3]
3357 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
3359 ; X64-NEXT: vpmovsxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xd0]
3360 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3361 ; X64-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
3362 ; X64-NEXT: vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
3363 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3364 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3365 ; X64-NEXT: retq ## encoding: [0xc3]
3366 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
3367 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
3368 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
3369 %res3 = add <8 x i64> %res, %res1
3370 %res4 = add <8 x i64> %res3, %res2
3375 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16)
3377 define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
3378 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
3380 ; X86-NEXT: vpmovsxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xd0]
3381 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3382 ; X86-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
3383 ; X86-NEXT: vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
3384 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3385 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3386 ; X86-NEXT: retl ## encoding: [0xc3]
3388 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
3390 ; X64-NEXT: vpmovsxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xd0]
3391 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3392 ; X64-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
3393 ; X64-NEXT: vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
3394 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3395 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3396 ; X64-NEXT: retq ## encoding: [0xc3]
3397 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
3398 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
3399 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
3400 %res3 = add <16 x i32> %res, %res1
3401 %res4 = add <16 x i32> %res3, %res2
3402 ret <16 x i32> %res4
3406 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8)
3408 define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
3409 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
3411 ; X86-NEXT: vpmovsxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd0]
3412 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3413 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3414 ; X86-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
3415 ; X86-NEXT: vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
3416 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3417 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3418 ; X86-NEXT: retl ## encoding: [0xc3]
3420 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
3422 ; X64-NEXT: vpmovsxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd0]
3423 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3424 ; X64-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
3425 ; X64-NEXT: vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
3426 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3427 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3428 ; X64-NEXT: retq ## encoding: [0xc3]
3429 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
3430 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
3431 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
3432 %res3 = add <8 x i64> %res, %res1
3433 %res4 = add <8 x i64> %res3, %res2
3437 declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>)
3439 define <16 x i32>@test_int_x86_avx512_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3440 ; X86-LABEL: test_int_x86_avx512_prolv_d_512:
3442 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9]
3443 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3444 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
3445 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
3446 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
3447 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
3448 ; X86-NEXT: retl ## encoding: [0xc3]
3450 ; X64-LABEL: test_int_x86_avx512_prolv_d_512:
3452 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9]
3453 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3454 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
3455 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
3456 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
3457 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
3458 ; X64-NEXT: retq ## encoding: [0xc3]
3459 %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3460 %2 = bitcast i16 %x3 to <16 x i1>
3461 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
3462 %4 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3463 %5 = bitcast i16 %x3 to <16 x i1>
3464 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
3465 %7 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3466 %res3 = add <16 x i32> %3, %6
3467 %res4 = add <16 x i32> %res3, %7
3468 ret <16 x i32> %res4
3471 declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>)
3473 define <8 x i64>@test_int_x86_avx512_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3474 ; X86-LABEL: test_int_x86_avx512_prolv_q_512:
3476 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9]
3477 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3478 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3479 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
3480 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
3481 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
3482 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
3483 ; X86-NEXT: retl ## encoding: [0xc3]
3485 ; X64-LABEL: test_int_x86_avx512_prolv_q_512:
3487 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9]
3488 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3489 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
3490 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
3491 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
3492 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
3493 ; X64-NEXT: retq ## encoding: [0xc3]
3494 %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3495 %2 = bitcast i8 %x3 to <8 x i1>
3496 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
3497 %4 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3498 %5 = bitcast i8 %x3 to <8 x i1>
3499 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
3500 %7 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3501 %res3 = add <8 x i64> %3, %6
3502 %res4 = add <8 x i64> %res3, %7
3506 declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>)
3508 define <16 x i32>@test_int_x86_avx512_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3509 ; X86-LABEL: test_int_x86_avx512_prorv_d_512:
3511 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9]
3512 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3513 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
3514 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
3515 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
3516 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
3517 ; X86-NEXT: retl ## encoding: [0xc3]
3519 ; X64-LABEL: test_int_x86_avx512_prorv_d_512:
3521 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9]
3522 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3523 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
3524 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
3525 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
3526 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
3527 ; X64-NEXT: retq ## encoding: [0xc3]
3528 %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3529 %2 = bitcast i16 %x3 to <16 x i1>
3530 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
3531 %4 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3532 %5 = bitcast i16 %x3 to <16 x i1>
3533 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
3534 %7 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3535 %res3 = add <16 x i32> %3, %6
3536 %res4 = add <16 x i32> %res3, %7
3537 ret <16 x i32> %res4
3540 declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>)
3542 define <8 x i64>@test_int_x86_avx512_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3543 ; X86-LABEL: test_int_x86_avx512_prorv_q_512:
3545 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9]
3546 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3547 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3548 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
3549 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
3550 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
3551 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
3552 ; X86-NEXT: retl ## encoding: [0xc3]
3554 ; X64-LABEL: test_int_x86_avx512_prorv_q_512:
3556 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9]
3557 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3558 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
3559 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
3560 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
3561 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
3562 ; X64-NEXT: retq ## encoding: [0xc3]
3563 %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3564 %2 = bitcast i8 %x3 to <8 x i1>
3565 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
3566 %4 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3567 %5 = bitcast i8 %x3 to <8 x i1>
3568 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
3569 %7 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3570 %res3 = add <8 x i64> %3, %6
3571 %res4 = add <8 x i64> %res3, %7
3575 declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32)
3577 define <16 x i32>@test_int_x86_avx512_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
3578 ; X86-LABEL: test_int_x86_avx512_prol_d_512:
3580 ; X86-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03]
3581 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3582 ; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
3583 ; X86-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03]
3584 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3585 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3586 ; X86-NEXT: retl ## encoding: [0xc3]
3588 ; X64-LABEL: test_int_x86_avx512_prol_d_512:
3590 ; X64-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03]
3591 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3592 ; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
3593 ; X64-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03]
3594 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3595 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3596 ; X64-NEXT: retq ## encoding: [0xc3]
3597 %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3)
3598 %2 = bitcast i16 %x3 to <16 x i1>
3599 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
3600 %4 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3)
3601 %5 = bitcast i16 %x3 to <16 x i1>
3602 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
3603 %7 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3)
3604 %res3 = add <16 x i32> %3, %6
3605 %res4 = add <16 x i32> %res3, %7
3606 ret <16 x i32> %res4
3609 declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32)
3611 define <8 x i64>@test_int_x86_avx512_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
3612 ; X86-LABEL: test_int_x86_avx512_prol_q_512:
3614 ; X86-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03]
3615 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
3616 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3617 ; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
3618 ; X86-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03]
3619 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3620 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3621 ; X86-NEXT: retl ## encoding: [0xc3]
3623 ; X64-LABEL: test_int_x86_avx512_prol_q_512:
3625 ; X64-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03]
3626 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3627 ; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
3628 ; X64-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03]
3629 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3630 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3631 ; X64-NEXT: retq ## encoding: [0xc3]
3632 %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3)
3633 %2 = bitcast i8 %x3 to <8 x i1>
3634 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
3635 %4 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3)
3636 %5 = bitcast i8 %x3 to <8 x i1>
3637 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
3638 %7 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3)
3639 %res3 = add <8 x i64> %3, %6
3640 %res4 = add <8 x i64> %res3, %7
3644 declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32)
3646 define <16 x i32>@test_int_x86_avx512_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
3647 ; X86-LABEL: test_int_x86_avx512_pror_d_512:
3649 ; X86-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03]
3650 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3651 ; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
3652 ; X86-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03]
3653 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3654 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3655 ; X86-NEXT: retl ## encoding: [0xc3]
3657 ; X64-LABEL: test_int_x86_avx512_pror_d_512:
3659 ; X64-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03]
3660 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3661 ; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
3662 ; X64-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03]
3663 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3664 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3665 ; X64-NEXT: retq ## encoding: [0xc3]
3666 %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3)
3667 %2 = bitcast i16 %x3 to <16 x i1>
3668 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
3669 %4 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3)
3670 %5 = bitcast i16 %x3 to <16 x i1>
3671 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
3672 %7 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3)
3673 %res3 = add <16 x i32> %3, %6
3674 %res4 = add <16 x i32> %res3, %7
3675 ret <16 x i32> %res4
3678 declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32)
3680 define <8 x i64>@test_int_x86_avx512_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
3681 ; X86-LABEL: test_int_x86_avx512_pror_q_512:
3683 ; X86-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03]
3684 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
3685 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3686 ; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
3687 ; X86-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03]
3688 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3689 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3690 ; X86-NEXT: retl ## encoding: [0xc3]
3692 ; X64-LABEL: test_int_x86_avx512_pror_q_512:
3694 ; X64-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03]
3695 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3696 ; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
3697 ; X64-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03]
3698 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3699 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3700 ; X64-NEXT: retq ## encoding: [0xc3]
3701 %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3)
3702 %2 = bitcast i8 %x3 to <8 x i1>
3703 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
3704 %4 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3)
3705 %5 = bitcast i8 %x3 to <8 x i1>
3706 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
3707 %7 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3)
3708 %res3 = add <8 x i64> %3, %6
3709 %res4 = add <8 x i64> %res3, %7
3713 declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8)
3715 define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
3716 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
3718 ; X86-NEXT: vpsrlq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x04]
3719 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
3720 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3721 ; X86-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
3722 ; X86-NEXT: vpsrlq $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x04]
3723 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
3724 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3725 ; X86-NEXT: retl ## encoding: [0xc3]
3727 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
3729 ; X64-NEXT: vpsrlq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x04]
3730 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3731 ; X64-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
3732 ; X64-NEXT: vpsrlq $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x04]
3733 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
3734 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3735 ; X64-NEXT: retq ## encoding: [0xc3]
3736 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3)
3737 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 -1)
3738 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
3739 %res3 = add <8 x i64> %res, %res1
3740 %res4 = add <8 x i64> %res3, %res2
3744 declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16)
3746 define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
3747 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_512:
3749 ; X86-NEXT: vpsrld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x04]
3750 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3751 ; X86-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
3752 ; X86-NEXT: vpsrld $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x04]
3753 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
3754 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3755 ; X86-NEXT: retl ## encoding: [0xc3]
3757 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_512:
3759 ; X64-NEXT: vpsrld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x04]
3760 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3761 ; X64-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
3762 ; X64-NEXT: vpsrld $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x04]
3763 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
3764 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3765 ; X64-NEXT: retq ## encoding: [0xc3]
3766 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3)
3767 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 -1)
3768 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
3769 %res3 = add <16 x i32> %res, %res1
3770 %res4 = add <16 x i32> %res3, %res2
3771 ret <16 x i32> %res4
3774 declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16)
3776 define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
3777 ; X86-LABEL: test_int_x86_avx512_mask_psra_di_512:
3779 ; X86-NEXT: vpsrad $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x03]
3780 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3781 ; X86-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
3782 ; X86-NEXT: vpsrad $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x03]
3783 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3784 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3785 ; X86-NEXT: retl ## encoding: [0xc3]
3787 ; X64-LABEL: test_int_x86_avx512_mask_psra_di_512:
3789 ; X64-NEXT: vpsrad $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x03]
3790 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3791 ; X64-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
3792 ; X64-NEXT: vpsrad $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x03]
3793 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3794 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3795 ; X64-NEXT: retq ## encoding: [0xc3]
3796 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
3797 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
3798 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
3799 %res3 = add <16 x i32> %res, %res1
3800 %res4 = add <16 x i32> %res3, %res2
3801 ret <16 x i32> %res4
3804 declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i32, <8 x i64>, i8)
3806 define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
3807 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_512:
3809 ; X86-NEXT: vpsraq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x03]
3810 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
3811 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3812 ; X86-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
3813 ; X86-NEXT: vpsraq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x03]
3814 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3815 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3816 ; X86-NEXT: retl ## encoding: [0xc3]
3818 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_512:
3820 ; X64-NEXT: vpsraq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x03]
3821 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3822 ; X64-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
3823 ; X64-NEXT: vpsraq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x03]
3824 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3825 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3826 ; X64-NEXT: retq ## encoding: [0xc3]
3827 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
3828 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
3829 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
3830 %res3 = add <8 x i64> %res, %res1
3831 %res4 = add <8 x i64> %res3, %res2
3835 declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i32, <16 x i32>, i16)
3837 define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
3838 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_512:
3840 ; X86-NEXT: vpslld $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x03]
3841 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3842 ; X86-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
3843 ; X86-NEXT: vpslld $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x03]
3844 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3845 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3846 ; X86-NEXT: retl ## encoding: [0xc3]
3848 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_512:
3850 ; X64-NEXT: vpslld $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x03]
3851 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3852 ; X64-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
3853 ; X64-NEXT: vpslld $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x03]
3854 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
3855 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
3856 ; X64-NEXT: retq ## encoding: [0xc3]
3857 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
3858 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
3859 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
3860 %res3 = add <16 x i32> %res, %res1
3861 %res4 = add <16 x i32> %res3, %res2
3862 ret <16 x i32> %res4
3865 declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i32, <8 x i64>, i8)
3867 define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
3868 ; X86-LABEL: test_int_x86_avx512_mask_psll_qi_512:
3870 ; X86-NEXT: vpsllq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x03]
3871 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
3872 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3873 ; X86-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
3874 ; X86-NEXT: vpsllq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x03]
3875 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3876 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3877 ; X86-NEXT: retl ## encoding: [0xc3]
3879 ; X64-LABEL: test_int_x86_avx512_mask_psll_qi_512:
3881 ; X64-NEXT: vpsllq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x03]
3882 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
3883 ; X64-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
3884 ; X64-NEXT: vpsllq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x03]
3885 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
3886 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
3887 ; X64-NEXT: retq ## encoding: [0xc3]
3888 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
3889 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
3890 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
3891 %res3 = add <8 x i64> %res, %res1
3892 %res4 = add <8 x i64> %res3, %res2
3896 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
3897 ; CHECK-LABEL: test_x86_avx512_psll_d:
3899 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf2,0xc1]
3900 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3901 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
3905 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
3906 ; X86-LABEL: test_x86_avx512_mask_psll_d:
3908 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3909 ; X86-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
3910 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3911 ; X86-NEXT: retl ## encoding: [0xc3]
3913 ; X64-LABEL: test_x86_avx512_mask_psll_d:
3915 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3916 ; X64-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
3917 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3918 ; X64-NEXT: retq ## encoding: [0xc3]
3919 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
3923 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
3924 ; X86-LABEL: test_x86_avx512_maskz_psll_d:
3926 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3927 ; X86-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
3928 ; X86-NEXT: retl ## encoding: [0xc3]
3930 ; X64-LABEL: test_x86_avx512_maskz_psll_d:
3932 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3933 ; X64-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
3934 ; X64-NEXT: retq ## encoding: [0xc3]
3935 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
3939 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
3941 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
3942 ; CHECK-LABEL: test_x86_avx512_psll_q:
3944 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf3,0xc1]
3945 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3946 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
3950 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
3951 ; X86-LABEL: test_x86_avx512_mask_psll_q:
3953 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3954 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3955 ; X86-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
3956 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3957 ; X86-NEXT: retl ## encoding: [0xc3]
3959 ; X64-LABEL: test_x86_avx512_mask_psll_q:
3961 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3962 ; X64-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
3963 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3964 ; X64-NEXT: retq ## encoding: [0xc3]
3965 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
3969 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
3970 ; X86-LABEL: test_x86_avx512_maskz_psll_q:
3972 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3973 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3974 ; X86-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
3975 ; X86-NEXT: retl ## encoding: [0xc3]
3977 ; X64-LABEL: test_x86_avx512_maskz_psll_q:
3979 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3980 ; X64-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
3981 ; X64-NEXT: retq ## encoding: [0xc3]
3982 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
3986 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
3988 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
3989 ; CHECK-LABEL: test_x86_avx512_psrl_d:
3991 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd2,0xc1]
3992 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3993 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
3997 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
3998 ; X86-LABEL: test_x86_avx512_mask_psrl_d:
4000 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4001 ; X86-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
4002 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4003 ; X86-NEXT: retl ## encoding: [0xc3]
4005 ; X64-LABEL: test_x86_avx512_mask_psrl_d:
4007 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4008 ; X64-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
4009 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4010 ; X64-NEXT: retq ## encoding: [0xc3]
4011 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4015 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4016 ; X86-LABEL: test_x86_avx512_maskz_psrl_d:
4018 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4019 ; X86-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
4020 ; X86-NEXT: retl ## encoding: [0xc3]
4022 ; X64-LABEL: test_x86_avx512_maskz_psrl_d:
4024 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4025 ; X64-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
4026 ; X64-NEXT: retq ## encoding: [0xc3]
4027 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4031 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4033 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
4034 ; CHECK-LABEL: test_x86_avx512_psrl_q:
4036 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd3,0xc1]
4037 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4038 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4042 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4043 ; X86-LABEL: test_x86_avx512_mask_psrl_q:
4045 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4046 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4047 ; X86-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
4048 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4049 ; X86-NEXT: retl ## encoding: [0xc3]
4051 ; X64-LABEL: test_x86_avx512_mask_psrl_q:
4053 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4054 ; X64-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
4055 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4056 ; X64-NEXT: retq ## encoding: [0xc3]
4057 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4061 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4062 ; X86-LABEL: test_x86_avx512_maskz_psrl_q:
4064 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4065 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4066 ; X86-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
4067 ; X86-NEXT: retl ## encoding: [0xc3]
4069 ; X64-LABEL: test_x86_avx512_maskz_psrl_q:
4071 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4072 ; X64-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
4073 ; X64-NEXT: retq ## encoding: [0xc3]
4074 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4078 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4080 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
4081 ; CHECK-LABEL: test_x86_avx512_psra_d:
4083 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xe2,0xc1]
4084 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4085 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4089 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4090 ; X86-LABEL: test_x86_avx512_mask_psra_d:
4092 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4093 ; X86-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
4094 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4095 ; X86-NEXT: retl ## encoding: [0xc3]
4097 ; X64-LABEL: test_x86_avx512_mask_psra_d:
4099 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4100 ; X64-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
4101 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4102 ; X64-NEXT: retq ## encoding: [0xc3]
4103 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4107 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4108 ; X86-LABEL: test_x86_avx512_maskz_psra_d:
4110 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4111 ; X86-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
4112 ; X86-NEXT: retl ## encoding: [0xc3]
4114 ; X64-LABEL: test_x86_avx512_maskz_psra_d:
4116 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4117 ; X64-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
4118 ; X64-NEXT: retq ## encoding: [0xc3]
4119 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4123 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4125 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
4126 ; CHECK-LABEL: test_x86_avx512_psra_q:
4128 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xe2,0xc1]
4129 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4130 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4134 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4135 ; X86-LABEL: test_x86_avx512_mask_psra_q:
4137 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4138 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4139 ; X86-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
4140 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4141 ; X86-NEXT: retl ## encoding: [0xc3]
4143 ; X64-LABEL: test_x86_avx512_mask_psra_q:
4145 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4146 ; X64-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
4147 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4148 ; X64-NEXT: retq ## encoding: [0xc3]
4149 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4153 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4154 ; X86-LABEL: test_x86_avx512_maskz_psra_q:
4156 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4157 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4158 ; X86-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
4159 ; X86-NEXT: retl ## encoding: [0xc3]
4161 ; X64-LABEL: test_x86_avx512_maskz_psra_q:
4163 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4164 ; X64-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
4165 ; X64-NEXT: retq ## encoding: [0xc3]
4166 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4170 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4172 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
4173 ; CHECK-LABEL: test_x86_avx512_psllv_d:
4175 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x47,0xc1]
4176 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4177 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4181 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4182 ; X86-LABEL: test_x86_avx512_mask_psllv_d:
4184 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4185 ; X86-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
4186 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4187 ; X86-NEXT: retl ## encoding: [0xc3]
4189 ; X64-LABEL: test_x86_avx512_mask_psllv_d:
4191 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4192 ; X64-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
4193 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4194 ; X64-NEXT: retq ## encoding: [0xc3]
4195 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4199 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4200 ; X86-LABEL: test_x86_avx512_maskz_psllv_d:
4202 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4203 ; X86-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
4204 ; X86-NEXT: retl ## encoding: [0xc3]
4206 ; X64-LABEL: test_x86_avx512_maskz_psllv_d:
4208 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4209 ; X64-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
4210 ; X64-NEXT: retq ## encoding: [0xc3]
4211 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4215 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4217 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
4218 ; CHECK-LABEL: test_x86_avx512_psllv_q:
4220 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x47,0xc1]
4221 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4222 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4226 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4227 ; X86-LABEL: test_x86_avx512_mask_psllv_q:
4229 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4230 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4231 ; X86-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
4232 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4233 ; X86-NEXT: retl ## encoding: [0xc3]
4235 ; X64-LABEL: test_x86_avx512_mask_psllv_q:
4237 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4238 ; X64-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
4239 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4240 ; X64-NEXT: retq ## encoding: [0xc3]
4241 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4245 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4246 ; X86-LABEL: test_x86_avx512_maskz_psllv_q:
4248 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4249 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4250 ; X86-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
4251 ; X86-NEXT: retl ## encoding: [0xc3]
4253 ; X64-LABEL: test_x86_avx512_maskz_psllv_q:
4255 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4256 ; X64-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
4257 ; X64-NEXT: retq ## encoding: [0xc3]
4258 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4262 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4265 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
4266 ; CHECK-LABEL: test_x86_avx512_psrav_d:
4268 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x46,0xc1]
4269 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4270 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4274 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4275 ; X86-LABEL: test_x86_avx512_mask_psrav_d:
4277 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4278 ; X86-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
4279 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4280 ; X86-NEXT: retl ## encoding: [0xc3]
4282 ; X64-LABEL: test_x86_avx512_mask_psrav_d:
4284 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4285 ; X64-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
4286 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4287 ; X64-NEXT: retq ## encoding: [0xc3]
4288 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4292 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4293 ; X86-LABEL: test_x86_avx512_maskz_psrav_d:
4295 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4296 ; X86-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
4297 ; X86-NEXT: retl ## encoding: [0xc3]
4299 ; X64-LABEL: test_x86_avx512_maskz_psrav_d:
4301 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4302 ; X64-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
4303 ; X64-NEXT: retq ## encoding: [0xc3]
4304 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4308 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4310 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
4311 ; CHECK-LABEL: test_x86_avx512_psrav_q:
4313 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x46,0xc1]
4314 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4315 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4319 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4320 ; X86-LABEL: test_x86_avx512_mask_psrav_q:
4322 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4323 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4324 ; X86-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
4325 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4326 ; X86-NEXT: retl ## encoding: [0xc3]
4328 ; X64-LABEL: test_x86_avx512_mask_psrav_q:
4330 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4331 ; X64-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
4332 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4333 ; X64-NEXT: retq ## encoding: [0xc3]
4334 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4338 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4339 ; X86-LABEL: test_x86_avx512_maskz_psrav_q:
4341 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4342 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4343 ; X86-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
4344 ; X86-NEXT: retl ## encoding: [0xc3]
4346 ; X64-LABEL: test_x86_avx512_maskz_psrav_q:
4348 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4349 ; X64-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
4350 ; X64-NEXT: retq ## encoding: [0xc3]
4351 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4355 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4357 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
4358 ; CHECK-LABEL: test_x86_avx512_psrlv_d:
4360 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x45,0xc1]
4361 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4362 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4366 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4367 ; X86-LABEL: test_x86_avx512_mask_psrlv_d:
4369 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4370 ; X86-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
4371 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4372 ; X86-NEXT: retl ## encoding: [0xc3]
4374 ; X64-LABEL: test_x86_avx512_mask_psrlv_d:
4376 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4377 ; X64-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
4378 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4379 ; X64-NEXT: retq ## encoding: [0xc3]
4380 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4384 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4385 ; X86-LABEL: test_x86_avx512_maskz_psrlv_d:
4387 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4388 ; X86-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
4389 ; X86-NEXT: retl ## encoding: [0xc3]
4391 ; X64-LABEL: test_x86_avx512_maskz_psrlv_d:
4393 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4394 ; X64-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
4395 ; X64-NEXT: retq ## encoding: [0xc3]
4396 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4400 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4402 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
4403 ; CHECK-LABEL: test_x86_avx512_psrlv_q:
4405 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0xc1]
4406 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4407 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4411 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4412 ; X86-LABEL: test_x86_avx512_mask_psrlv_q:
4414 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4415 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4416 ; X86-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
4417 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4418 ; X86-NEXT: retl ## encoding: [0xc3]
4420 ; X64-LABEL: test_x86_avx512_mask_psrlv_q:
4422 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4423 ; X64-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
4424 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4425 ; X64-NEXT: retq ## encoding: [0xc3]
4426 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4430 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4431 ; X86-LABEL: test_x86_avx512_maskz_psrlv_q:
4433 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4434 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4435 ; X86-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
4436 ; X86-NEXT: retl ## encoding: [0xc3]
4438 ; X64-LABEL: test_x86_avx512_maskz_psrlv_q:
4440 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4441 ; X64-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
4442 ; X64-NEXT: retq ## encoding: [0xc3]
4443 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4447 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4449 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
4450 ; X86-LABEL: test_x86_avx512_psrlv_q_memop:
4452 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4453 ; X86-NEXT: vpsrlvq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x00]
4454 ; X86-NEXT: retl ## encoding: [0xc3]
4456 ; X64-LABEL: test_x86_avx512_psrlv_q_memop:
4458 ; X64-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x07]
4459 ; X64-NEXT: retq ## encoding: [0xc3]
4460 %b = load <8 x i64>, <8 x i64>* %ptr
4461 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
4465 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
4467 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
4468 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
4470 ; X86-NEXT: vcvtdq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xd0]
4471 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4472 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4473 ; X86-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
4474 ; X86-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
4475 ; X86-NEXT: retl ## encoding: [0xc3]
4477 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
4479 ; X64-NEXT: vcvtdq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xd0]
4480 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4481 ; X64-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
4482 ; X64-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
4483 ; X64-NEXT: retq ## encoding: [0xc3]
4484 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
4485 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
4486 %res2 = fadd <8 x double> %res, %res1
4487 ret <8 x double> %res2
4490 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
4492 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
4493 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
4495 ; X86-NEXT: vcvtudq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xd0]
4496 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4497 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4498 ; X86-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
4499 ; X86-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
4500 ; X86-NEXT: retl ## encoding: [0xc3]
4502 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
4504 ; X64-NEXT: vcvtudq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xd0]
4505 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4506 ; X64-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
4507 ; X64-NEXT: vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
4508 ; X64-NEXT: retq ## encoding: [0xc3]
4509 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
4510 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
4511 %res2 = fadd <8 x double> %res, %res1
4512 ret <8 x double> %res2
4515 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
4516 ; CHECK-LABEL: test_valign_q:
4518 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x03,0xc1,0x02]
4519 ; CHECK-NEXT: ## zmm0 = zmm1[2,3,4,5,6,7],zmm0[0,1]
4520 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4521 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
4525 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
4526 ; X86-LABEL: test_mask_valign_q:
4528 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4529 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4530 ; X86-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
4531 ; X86-NEXT: ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
4532 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4533 ; X86-NEXT: retl ## encoding: [0xc3]
4535 ; X64-LABEL: test_mask_valign_q:
4537 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4538 ; X64-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
4539 ; X64-NEXT: ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
4540 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4541 ; X64-NEXT: retq ## encoding: [0xc3]
4542 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
4546 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
4548 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
4549 ; X86-LABEL: test_maskz_valign_d:
4551 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4552 ; X86-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
4553 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
4554 ; X86-NEXT: retl ## encoding: [0xc3]
4556 ; X64-LABEL: test_maskz_valign_d:
4558 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4559 ; X64-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
4560 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
4561 ; X64-NEXT: retq ## encoding: [0xc3]
4562 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
4566 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
4568 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4570 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4571 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
4573 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xd9]
4574 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4575 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4576 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
4577 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
4578 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
4579 ; X86-NEXT: vaddpd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0x58,0xc0]
4580 ; X86-NEXT: retl ## encoding: [0xc3]
4582 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
4584 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xd9]
4585 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4586 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
4587 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
4588 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
4589 ; X64-NEXT: vaddpd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0x58,0xc0]
4590 ; X64-NEXT: retq ## encoding: [0xc3]
4591 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4592 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
4593 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4594 %res3 = fadd <8 x double> %res, %res1
4595 %res4 = fadd <8 x double> %res2, %res3
4596 ret <8 x double> %res4
4599 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4601 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4602 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
4604 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xd9]
4605 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4606 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
4607 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
4608 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
4609 ; X86-NEXT: vaddps %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0]
4610 ; X86-NEXT: retl ## encoding: [0xc3]
4612 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
4614 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xd9]
4615 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4616 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
4617 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
4618 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
4619 ; X64-NEXT: vaddps %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0]
4620 ; X64-NEXT: retq ## encoding: [0xc3]
4621 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4622 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
4623 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4624 %res3 = fadd <16 x float> %res, %res1
4625 %res4 = fadd <16 x float> %res2, %res3
4626 ret <16 x float> %res4
4629 ; Test case to make sure we can print shuffle decode comments for constant pool loads.
4630 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4631 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
4633 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4634 ; X86-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
4635 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
4636 ; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_0, kind: FK_Data_4
4637 ; X86-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
4638 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
4639 ; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_1, kind: FK_Data_4
4640 ; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
4641 ; X86-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
4642 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
4643 ; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_2, kind: FK_Data_4
4644 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
4645 ; X86-NEXT: retl ## encoding: [0xc3]
4647 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
4649 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4650 ; X64-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
4651 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
4652 ; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_0-4, kind: reloc_riprel_4byte
4653 ; X64-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
4654 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
4655 ; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_1-4, kind: reloc_riprel_4byte
4656 ; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
4657 ; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
4658 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
4659 ; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_2-4, kind: reloc_riprel_4byte
4660 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
4661 ; X64-NEXT: retq ## encoding: [0xc3]
4662 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
4663 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
4664 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
4665 %res3 = fadd <16 x float> %res, %res1
4666 %res4 = fadd <16 x float> %res2, %res3
4667 ret <16 x float> %res4
4670 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
4671 ; CHECK-LABEL: test_mask_mul_epi32_rr:
4673 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
4674 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4675 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
4679 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
4680 ; X86-LABEL: test_mask_mul_epi32_rrk:
4682 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4683 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4684 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
4685 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4686 ; X86-NEXT: retl ## encoding: [0xc3]
4688 ; X64-LABEL: test_mask_mul_epi32_rrk:
4690 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4691 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
4692 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4693 ; X64-NEXT: retq ## encoding: [0xc3]
4694 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
4698 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
4699 ; X86-LABEL: test_mask_mul_epi32_rrkz:
4701 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4702 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4703 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
4704 ; X86-NEXT: retl ## encoding: [0xc3]
4706 ; X64-LABEL: test_mask_mul_epi32_rrkz:
4708 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4709 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
4710 ; X64-NEXT: retq ## encoding: [0xc3]
4711 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
4715 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
4716 ; X86-LABEL: test_mask_mul_epi32_rm:
4718 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4719 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
4720 ; X86-NEXT: retl ## encoding: [0xc3]
4722 ; X64-LABEL: test_mask_mul_epi32_rm:
4724 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
4725 ; X64-NEXT: retq ## encoding: [0xc3]
4726 %b = load <16 x i32>, <16 x i32>* %ptr_b
4727 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
4731 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
4732 ; X86-LABEL: test_mask_mul_epi32_rmk:
4734 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4735 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4736 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
4737 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
4738 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4739 ; X86-NEXT: retl ## encoding: [0xc3]
4741 ; X64-LABEL: test_mask_mul_epi32_rmk:
4743 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4744 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
4745 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4746 ; X64-NEXT: retq ## encoding: [0xc3]
4747 %b = load <16 x i32>, <16 x i32>* %ptr_b
4748 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
4752 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
4753 ; X86-LABEL: test_mask_mul_epi32_rmkz:
4755 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4756 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4757 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
4758 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
4759 ; X86-NEXT: retl ## encoding: [0xc3]
4761 ; X64-LABEL: test_mask_mul_epi32_rmkz:
4763 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4764 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
4765 ; X64-NEXT: retq ## encoding: [0xc3]
4766 %b = load <16 x i32>, <16 x i32>* %ptr_b
4767 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
4771 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
4772 ; X86-LABEL: test_mask_mul_epi32_rmb:
4774 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4775 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
4776 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
4777 ; X86-NEXT: retl ## encoding: [0xc3]
4779 ; X64-LABEL: test_mask_mul_epi32_rmb:
4781 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
4782 ; X64-NEXT: retq ## encoding: [0xc3]
4783 %q = load i64, i64* %ptr_b
4784 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
4785 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
4786 %b = bitcast <8 x i64> %b64 to <16 x i32>
4787 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
4791 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
4792 ; X86-LABEL: test_mask_mul_epi32_rmbk:
4794 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4795 ; X86-NEXT: vpbroadcastd (%eax), %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x10]
4796 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4797 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4798 ; X86-NEXT: vpmuldq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xca]
4799 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4800 ; X86-NEXT: retl ## encoding: [0xc3]
4802 ; X64-LABEL: test_mask_mul_epi32_rmbk:
4804 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4805 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
4806 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4807 ; X64-NEXT: retq ## encoding: [0xc3]
4808 %q = load i64, i64* %ptr_b
4809 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
4810 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
4811 %b = bitcast <8 x i64> %b64 to <16 x i32>
4812 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
4816 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
4817 ; X86-LABEL: test_mask_mul_epi32_rmbkz:
4819 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4820 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
4821 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4822 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4823 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
4824 ; X86-NEXT: retl ## encoding: [0xc3]
4826 ; X64-LABEL: test_mask_mul_epi32_rmbkz:
4828 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4829 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
4830 ; X64-NEXT: retq ## encoding: [0xc3]
4831 %q = load i64, i64* %ptr_b
4832 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
4833 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
4834 %b = bitcast <8 x i64> %b64 to <16 x i32>
4835 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
4839 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
4841 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
4842 ; CHECK-LABEL: test_mask_mul_epu32_rr:
4844 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
4845 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4846 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
4850 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
4851 ; X86-LABEL: test_mask_mul_epu32_rrk:
4853 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4854 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4855 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
4856 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4857 ; X86-NEXT: retl ## encoding: [0xc3]
4859 ; X64-LABEL: test_mask_mul_epu32_rrk:
4861 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4862 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
4863 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4864 ; X64-NEXT: retq ## encoding: [0xc3]
4865 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
4869 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
4870 ; X86-LABEL: test_mask_mul_epu32_rrkz:
4872 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4873 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4874 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
4875 ; X86-NEXT: retl ## encoding: [0xc3]
4877 ; X64-LABEL: test_mask_mul_epu32_rrkz:
4879 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4880 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
4881 ; X64-NEXT: retq ## encoding: [0xc3]
4882 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
4886 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
4887 ; X86-LABEL: test_mask_mul_epu32_rm:
4889 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4890 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
4891 ; X86-NEXT: retl ## encoding: [0xc3]
4893 ; X64-LABEL: test_mask_mul_epu32_rm:
4895 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
4896 ; X64-NEXT: retq ## encoding: [0xc3]
4897 %b = load <16 x i32>, <16 x i32>* %ptr_b
4898 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
4902 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
4903 ; X86-LABEL: test_mask_mul_epu32_rmk:
4905 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4906 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4907 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
4908 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
4909 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4910 ; X86-NEXT: retl ## encoding: [0xc3]
4912 ; X64-LABEL: test_mask_mul_epu32_rmk:
4914 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4915 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
4916 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4917 ; X64-NEXT: retq ## encoding: [0xc3]
4918 %b = load <16 x i32>, <16 x i32>* %ptr_b
4919 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
4923 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
4924 ; X86-LABEL: test_mask_mul_epu32_rmkz:
4926 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4927 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4928 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
4929 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
4930 ; X86-NEXT: retl ## encoding: [0xc3]
4932 ; X64-LABEL: test_mask_mul_epu32_rmkz:
4934 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4935 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
4936 ; X64-NEXT: retq ## encoding: [0xc3]
4937 %b = load <16 x i32>, <16 x i32>* %ptr_b
4938 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
4942 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
4943 ; X86-LABEL: test_mask_mul_epu32_rmb:
4945 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4946 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
4947 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
4948 ; X86-NEXT: retl ## encoding: [0xc3]
4950 ; X64-LABEL: test_mask_mul_epu32_rmb:
4952 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
4953 ; X64-NEXT: retq ## encoding: [0xc3]
4954 %q = load i64, i64* %ptr_b
4955 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
4956 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
4957 %b = bitcast <8 x i64> %b64 to <16 x i32>
4958 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
4962 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
4963 ; X86-LABEL: test_mask_mul_epu32_rmbk:
4965 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4966 ; X86-NEXT: vpbroadcastd (%eax), %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x10]
4967 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4968 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4969 ; X86-NEXT: vpmuludq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xca]
4970 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4971 ; X86-NEXT: retl ## encoding: [0xc3]
4973 ; X64-LABEL: test_mask_mul_epu32_rmbk:
4975 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4976 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
4977 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4978 ; X64-NEXT: retq ## encoding: [0xc3]
4979 %q = load i64, i64* %ptr_b
4980 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
4981 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
4982 %b = bitcast <8 x i64> %b64 to <16 x i32>
4983 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
4987 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
4988 ; X86-LABEL: test_mask_mul_epu32_rmbkz:
4990 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4991 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
4992 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4993 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4994 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
4995 ; X86-NEXT: retl ## encoding: [0xc3]
4997 ; X64-LABEL: test_mask_mul_epu32_rmbkz:
4999 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5000 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
5001 ; X64-NEXT: retq ## encoding: [0xc3]
5002 %q = load i64, i64* %ptr_b
5003 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5004 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5005 %b = bitcast <8 x i64> %b64 to <16 x i32>
5006 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5010 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
5012 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
5013 ; X86-LABEL: test_mask_vextractf32x4:
5015 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5016 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5017 ; X86-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
5018 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5019 ; X86-NEXT: retl ## encoding: [0xc3]
5021 ; X64-LABEL: test_mask_vextractf32x4:
5023 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5024 ; X64-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
5025 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5026 ; X64-NEXT: retq ## encoding: [0xc3]
5027 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
5028 ret <4 x float> %res
5031 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
5033 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
5034 ; X86-LABEL: test_mask_vextracti64x4:
5036 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5037 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5038 ; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
5039 ; X86-NEXT: retl ## encoding: [0xc3]
5041 ; X64-LABEL: test_mask_vextracti64x4:
5043 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5044 ; X64-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
5045 ; X64-NEXT: retq ## encoding: [0xc3]
5046 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask)
5050 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
5052 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
5053 ; X86-LABEL: test_maskz_vextracti32x4:
5055 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5056 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5057 ; X86-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
5058 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5059 ; X86-NEXT: retl ## encoding: [0xc3]
5061 ; X64-LABEL: test_maskz_vextracti32x4:
5063 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5064 ; X64-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
5065 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5066 ; X64-NEXT: retq ## encoding: [0xc3]
5067 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
5071 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
5073 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
5074 ; CHECK-LABEL: test_vextractf64x4:
5076 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc0,0x01]
5077 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5078 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 1, <4 x double> zeroinitializer, i8 -1)
5079 ret <4 x double> %res
5082 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
5084 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
5086 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) {
5087 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5089 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01]
5090 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5091 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
5092 ; X86-NEXT: vaddps %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xd3]
5093 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
5094 ; X86-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
5095 ; X86-NEXT: retl ## encoding: [0xc3]
5097 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5099 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01]
5100 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5101 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
5102 ; X64-NEXT: vaddps %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xd3]
5103 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
5104 ; X64-NEXT: vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
5105 ; X64-NEXT: retq ## encoding: [0xc3]
5106 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
5107 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
5108 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
5109 %res3 = fadd <16 x float> %res, %res1
5110 %res4 = fadd <16 x float> %res2, %res3
5111 ret <16 x float> %res4
5114 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16)
5116 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5117 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5119 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01]
5120 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5121 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
5122 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
5123 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
5124 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
5125 ; X86-NEXT: retl ## encoding: [0xc3]
5127 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5129 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01]
5130 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5131 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
5132 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
5133 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
5134 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
5135 ; X64-NEXT: retq ## encoding: [0xc3]
5136 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
5137 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
5138 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
5139 %res3 = add <16 x i32> %res, %res1
5140 %res4 = add <16 x i32> %res2, %res3
5141 ret <16 x i32> %res4
5144 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
5146 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
5147 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5149 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01]
5150 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5151 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5152 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
5153 ; X86-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
5154 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
5155 ; X86-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
5156 ; X86-NEXT: retl ## encoding: [0xc3]
5158 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5160 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01]
5161 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5162 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
5163 ; X64-NEXT: vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
5164 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
5165 ; X64-NEXT: vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
5166 ; X64-NEXT: retq ## encoding: [0xc3]
5167 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
5168 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
5169 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
5170 %res3 = fadd <8 x double> %res, %res1
5171 %res4 = fadd <8 x double> %res2, %res3
5172 ret <8 x double> %res4
5175 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
5177 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5178 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5180 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01]
5181 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5182 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5183 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
5184 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
5185 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
5186 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
5187 ; X86-NEXT: retl ## encoding: [0xc3]
5189 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5191 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01]
5192 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5193 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
5194 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
5195 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
5196 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
5197 ; X64-NEXT: retq ## encoding: [0xc3]
5198 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
5199 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
5200 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
5201 %res3 = add <8 x i64> %res, %res1
5202 %res4 = add <8 x i64> %res2, %res3
5206 define <8 x i64> @test_x86_avx512_movntdqa(i8* %a0) {
5207 ; X86-LABEL: test_x86_avx512_movntdqa:
5209 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5210 ; X86-NEXT: vmovntdqa (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x00]
5211 ; X86-NEXT: retl ## encoding: [0xc3]
5213 ; X64-LABEL: test_x86_avx512_movntdqa:
5215 ; X64-NEXT: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
5216 ; X64-NEXT: retq ## encoding: [0xc3]
5217 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %a0)
5221 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) nounwind readonly
5223 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
5224 ; CHECK-LABEL: test_cmp_d_512:
5226 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
5227 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
5228 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd1,0x02]
5229 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
5230 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xe1,0x05]
5231 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xe9]
5232 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5233 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5234 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
5235 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5236 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5237 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5238 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5239 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5240 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5241 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5242 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5243 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5244 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5245 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
5246 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5247 ; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5248 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5249 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5250 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
5251 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5252 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
5253 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5254 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
5255 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5256 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
5257 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5258 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
5259 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5260 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
5261 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5262 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
5263 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5264 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
5265 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5269 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
5270 ; X86-LABEL: test_mask_cmp_d_512:
5272 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5273 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5274 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5275 ; X86-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
5276 ; X86-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
5277 ; X86-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5278 ; X86-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
5279 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
5280 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5281 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5282 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
5283 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
5284 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5285 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
5286 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5287 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
5288 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5289 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
5290 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5291 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
5292 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5293 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5294 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5295 ; X86-NEXT: retl ## encoding: [0xc3]
5297 ; X64-LABEL: test_mask_cmp_d_512:
5299 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5300 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5301 ; X64-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
5302 ; X64-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
5303 ; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5304 ; X64-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
5305 ; X64-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
5306 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5307 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5308 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
5309 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5310 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5311 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5312 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5313 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5314 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5315 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5316 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5317 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5318 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5319 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5320 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5321 ; X64-NEXT: retq ## encoding: [0xc3]
5322 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
5323 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5324 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
5325 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5326 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
5327 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5328 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
5329 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5330 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
5331 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5332 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
5333 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5334 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
5335 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5336 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
5337 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5341 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
5343 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
5344 ; CHECK-LABEL: test_ucmp_d_512:
5346 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
5347 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x01]
5348 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xd1,0x02]
5349 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
5350 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe1,0x05]
5351 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe9,0x06]
5352 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5353 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5354 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
5355 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5356 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5357 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5358 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5359 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5360 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5361 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5362 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5363 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5364 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5365 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
5366 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5367 ; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5368 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5369 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5370 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
5371 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5372 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
5373 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5374 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
5375 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5376 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
5377 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5378 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
5379 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5380 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
5381 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5382 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
5383 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5384 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
5385 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5389 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
5390 ; X86-LABEL: test_mask_ucmp_d_512:
5392 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5393 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5394 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5395 ; X86-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
5396 ; X86-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
5397 ; X86-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5398 ; X86-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
5399 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
5400 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5401 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5402 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
5403 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
5404 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5405 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
5406 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5407 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
5408 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5409 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
5410 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5411 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
5412 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5413 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5414 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5415 ; X86-NEXT: retl ## encoding: [0xc3]
5417 ; X64-LABEL: test_mask_ucmp_d_512:
5419 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5420 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5421 ; X64-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
5422 ; X64-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
5423 ; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5424 ; X64-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
5425 ; X64-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
5426 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5427 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5428 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
5429 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5430 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5431 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5432 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5433 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5434 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5435 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5436 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5437 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5438 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5439 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5440 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5441 ; X64-NEXT: retq ## encoding: [0xc3]
5442 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
5443 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5444 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
5445 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5446 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
5447 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5448 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
5449 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5450 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
5451 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5452 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
5453 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5454 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
5455 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5456 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
5457 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5461 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
5463 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
5464 ; CHECK-LABEL: test_cmp_q_512:
5466 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
5467 ; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
5468 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x02]
5469 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04]
5470 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xe1,0x05]
5471 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe9]
5472 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5473 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5474 ; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
5475 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5476 ; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
5477 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5478 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5479 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5480 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5481 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5482 ; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5483 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5484 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5485 ; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
5486 ; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5487 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5488 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5489 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
5490 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5491 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
5492 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5493 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
5494 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5495 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
5496 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5497 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
5498 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5499 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
5500 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5501 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
5502 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5503 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
5504 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5508 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
5509 ; X86-LABEL: test_mask_cmp_q_512:
5511 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5512 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5513 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
5514 ; X86-NEXT: vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0]
5515 ; X86-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02]
5516 ; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
5517 ; X86-NEXT: vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05]
5518 ; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
5519 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5520 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5521 ; X86-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
5522 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
5523 ; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5524 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
5525 ; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5526 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
5527 ; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5528 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
5529 ; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5530 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
5531 ; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5532 ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5533 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5534 ; X86-NEXT: retl ## encoding: [0xc3]
5536 ; X64-LABEL: test_mask_cmp_q_512:
5538 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5539 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
5540 ; X64-NEXT: vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0]
5541 ; X64-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02]
5542 ; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
5543 ; X64-NEXT: vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05]
5544 ; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
5545 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5546 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5547 ; X64-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
5548 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5549 ; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
5550 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5551 ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5552 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5553 ; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5554 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5555 ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5556 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5557 ; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5558 ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5559 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5560 ; X64-NEXT: retq ## encoding: [0xc3]
5561 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
5562 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5563 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
5564 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5565 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
5566 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5567 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
5568 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5569 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
5570 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5571 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
5572 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5573 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
5574 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5575 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
5576 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5580 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
5582 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
5583 ; CHECK-LABEL: test_ucmp_q_512:
5585 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
5586 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x01]
5587 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd1,0x02]
5588 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04]
5589 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x05]
5590 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe9,0x06]
5591 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5592 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5593 ; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
5594 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5595 ; CHECK-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
5596 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5597 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5598 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5599 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5600 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5601 ; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5602 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5603 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5604 ; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
5605 ; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5606 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5607 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5608 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
5609 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5610 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
5611 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5612 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
5613 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5614 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
5615 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5616 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
5617 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5618 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
5619 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5620 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
5621 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5622 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
5623 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5627 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
5628 ; X86-LABEL: test_mask_ucmp_q_512:
5630 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5631 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5632 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
5633 ; X86-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01]
5634 ; X86-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02]
5635 ; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
5636 ; X86-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05]
5637 ; X86-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
5638 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5639 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5640 ; X86-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
5641 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
5642 ; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5643 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
5644 ; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5645 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
5646 ; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5647 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
5648 ; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5649 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
5650 ; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5651 ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5652 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5653 ; X86-NEXT: retl ## encoding: [0xc3]
5655 ; X64-LABEL: test_mask_ucmp_q_512:
5657 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5658 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
5659 ; X64-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01]
5660 ; X64-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02]
5661 ; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
5662 ; X64-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05]
5663 ; X64-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
5664 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5665 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
5666 ; X64-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
5667 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5668 ; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
5669 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5670 ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5671 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5672 ; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5673 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5674 ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5675 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5676 ; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5677 ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5678 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5679 ; X64-NEXT: retq ## encoding: [0xc3]
5680 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
5681 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5682 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
5683 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5684 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
5685 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5686 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
5687 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5688 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
5689 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5690 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
5691 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5692 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
5693 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5694 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
5695 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5699 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
5701 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
5703 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
5704 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
5706 ; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
5707 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
5708 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
5709 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5710 ; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
5711 ; X86-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
5712 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
5713 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
5714 ; X86-NEXT: retl ## encoding: [0xc3]
5716 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
5718 ; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
5719 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
5720 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
5721 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5722 ; X64-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
5723 ; X64-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
5724 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
5725 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
5726 ; X64-NEXT: retq ## encoding: [0xc3]
5728 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
5729 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
5730 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
5731 %res4 = fadd <16 x float> %res1, %res2
5732 %res5 = fadd <16 x float> %res3, %res4
5733 ret <16 x float> %res5
5736 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512_load(<4 x float>* %x0ptr, <16 x float> %x2, i16 %mask) {
5737 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
5739 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5740 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
5741 ; X86-NEXT: vbroadcastf32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x00]
5742 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5743 ; X86-NEXT: retl ## encoding: [0xc3]
5745 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
5747 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5748 ; X64-NEXT: vbroadcastf32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x07]
5749 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5750 ; X64-NEXT: retq ## encoding: [0xc3]
5751 %x0 = load <4 x float>, <4 x float>* %x0ptr
5752 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
5753 ret <16 x float> %res
5756 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
5758 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
5759 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
5761 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
5762 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
5763 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5764 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5765 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
5766 ; X86-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
5767 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
5768 ; X86-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
5769 ; X86-NEXT: retl ## encoding: [0xc3]
5771 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
5773 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
5774 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
5775 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5776 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
5777 ; X64-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
5778 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
5779 ; X64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
5780 ; X64-NEXT: retq ## encoding: [0xc3]
5782 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
5783 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
5784 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
5785 %res4 = fadd <8 x double> %res1, %res2
5786 %res5 = fadd <8 x double> %res3, %res4
5787 ret <8 x double> %res5
5790 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512_load(<4 x double>* %x0ptr, <8 x double> %x2, i8 %mask) {
5791 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
5793 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5794 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5795 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5796 ; X86-NEXT: vbroadcastf64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x00]
5797 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
5798 ; X86-NEXT: retl ## encoding: [0xc3]
5800 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
5802 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5803 ; X64-NEXT: vbroadcastf64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x07]
5804 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
5805 ; X64-NEXT: retq ## encoding: [0xc3]
5807 %x0 = load <4 x double>, <4 x double>* %x0ptr
5808 %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
5809 ret <8 x double> %res
5812 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
5814 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
5815 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
5817 ; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
5818 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
5819 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
5820 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5821 ; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
5822 ; X86-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
5823 ; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
5824 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
5825 ; X86-NEXT: retl ## encoding: [0xc3]
5827 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
5829 ; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
5830 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
5831 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
5832 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5833 ; X64-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
5834 ; X64-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
5835 ; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
5836 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
5837 ; X64-NEXT: retq ## encoding: [0xc3]
5839 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
5840 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
5841 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
5842 %res4 = add <16 x i32> %res1, %res2
5843 %res5 = add <16 x i32> %res3, %res4
5844 ret <16 x i32> %res5
5847 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512_load(<4 x i32>* %x0ptr, <16 x i32> %x2, i16 %mask) {
5848 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
5850 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5851 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
5852 ; X86-NEXT: vbroadcasti32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x00]
5853 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5854 ; X86-NEXT: retl ## encoding: [0xc3]
5856 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
5858 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5859 ; X64-NEXT: vbroadcasti32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x07]
5860 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
5861 ; X64-NEXT: retq ## encoding: [0xc3]
5863 %x0 = load <4 x i32>, <4 x i32>* %x0ptr
5864 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
5868 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
5870 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
5871 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
5873 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
5874 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
5875 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5876 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5877 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
5878 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
5879 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
5880 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
5881 ; X86-NEXT: retl ## encoding: [0xc3]
5883 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
5885 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
5886 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
5887 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5888 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
5889 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
5890 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
5891 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
5892 ; X64-NEXT: retq ## encoding: [0xc3]
5894 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
5895 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
5896 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
5897 %res4 = add <8 x i64> %res1, %res2
5898 %res5 = add <8 x i64> %res3, %res4
5902 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512_load(<4 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) {
5903 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
5905 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5906 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5907 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5908 ; X86-NEXT: vbroadcasti64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x00]
5909 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
5910 ; X86-NEXT: retl ## encoding: [0xc3]
5912 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
5914 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5915 ; X64-NEXT: vbroadcasti64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x07]
5916 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
5917 ; X64-NEXT: retq ## encoding: [0xc3]
5919 %x0 = load <4 x i64>, <4 x i64>* %x0ptr
5920 %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
5924 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
5926 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
5927 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_512:
5929 ; X86-NEXT: vpabsd %zmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xd0]
5930 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5931 ; X86-NEXT: vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
5932 ; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc2]
5933 ; X86-NEXT: retl ## encoding: [0xc3]
5935 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_512:
5937 ; X64-NEXT: vpabsd %zmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xd0]
5938 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5939 ; X64-NEXT: vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
5940 ; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc2]
5941 ; X64-NEXT: retq ## encoding: [0xc3]
5942 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
5943 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
5944 %res2 = add <16 x i32> %res, %res1
5945 ret <16 x i32> %res2
5948 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
5950 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
5951 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_512:
5953 ; X86-NEXT: vpabsq %zmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xd0]
5954 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5955 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5956 ; X86-NEXT: vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
5957 ; X86-NEXT: vpaddq %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc2]
5958 ; X86-NEXT: retl ## encoding: [0xc3]
5960 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_512:
5962 ; X64-NEXT: vpabsq %zmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xd0]
5963 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5964 ; X64-NEXT: vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
5965 ; X64-NEXT: vpaddq %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc2]
5966 ; X64-NEXT: retq ## encoding: [0xc3]
5967 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
5968 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
5969 %res2 = add <8 x i64> %res, %res1
5973 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
5974 ; X86-LABEL: test_vptestmq:
5976 ; X86-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
5977 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5978 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
5979 ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8]
5980 ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
5981 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5982 ; X86-NEXT: retl ## encoding: [0xc3]
5984 ; X64-LABEL: test_vptestmq:
5986 ; X64-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
5987 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
5988 ; X64-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
5989 ; X64-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
5990 ; X64-NEXT: ## kill: def $al killed $al killed $eax
5991 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5992 ; X64-NEXT: retq ## encoding: [0xc3]
5993 %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
5994 %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
5995 %res2 = add i8 %res1, %res
5998 declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
6000 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
6001 ; X86-LABEL: test_vptestmd:
6003 ; X86-NEXT: vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
6004 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6005 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6006 ; X86-NEXT: andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
6007 ; X86-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
6008 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6009 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6010 ; X86-NEXT: retl ## encoding: [0xc3]
6012 ; X64-LABEL: test_vptestmd:
6014 ; X64-NEXT: vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
6015 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6016 ; X64-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
6017 ; X64-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
6018 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6019 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6020 ; X64-NEXT: retq ## encoding: [0xc3]
6021 %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
6022 %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
6023 %res2 = add i16 %res1, %res
6026 declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
6028 declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2)
6030 define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
6031 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_512:
6033 ; X86-NEXT: vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
6034 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6035 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6036 ; X86-NEXT: andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
6037 ; X86-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
6038 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6039 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6040 ; X86-NEXT: retl ## encoding: [0xc3]
6042 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_512:
6044 ; X64-NEXT: vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
6045 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6046 ; X64-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
6047 ; X64-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
6048 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6049 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6050 ; X64-NEXT: retq ## encoding: [0xc3]
6051 %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
6052 %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
6053 %res2 = add i16 %res, %res1
6057 declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2)
6059 define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
6060 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_512:
6062 ; X86-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
6063 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6064 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
6065 ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8]
6066 ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
6067 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6068 ; X86-NEXT: retl ## encoding: [0xc3]
6070 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_512:
6072 ; X64-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
6073 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6074 ; X64-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
6075 ; X64-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
6076 ; X64-NEXT: ## kill: def $al killed $al killed $eax
6077 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6078 ; X64-NEXT: retq ## encoding: [0xc3]
6079 %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
6080 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
6081 %res2 = add i8 %res, %res1
6085 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
6086 define i16 @test_kand(i16 %a0, i16 %a1) {
6087 ; X86-LABEL: test_kand:
6089 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6090 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
6091 ; X86-NEXT: andl $8, %eax ## encoding: [0x83,0xe0,0x08]
6092 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6093 ; X86-NEXT: retl ## encoding: [0xc3]
6095 ; X64-LABEL: test_kand:
6097 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6098 ; X64-NEXT: andl %esi, %eax ## encoding: [0x21,0xf0]
6099 ; X64-NEXT: andl $8, %eax ## encoding: [0x83,0xe0,0x08]
6100 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6101 ; X64-NEXT: retq ## encoding: [0xc3]
6102 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
6103 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
6107 declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone
6108 define i16 @test_kandn(i16 %a0, i16 %a1) {
6109 ; X86-LABEL: test_kandn:
6111 ; X86-NEXT: movl $65527, %eax ## encoding: [0xb8,0xf7,0xff,0x00,0x00]
6112 ; X86-NEXT: ## imm = 0xFFF7
6113 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ## encoding: [0x0b,0x44,0x24,0x04]
6114 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
6115 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6116 ; X86-NEXT: retl ## encoding: [0xc3]
6118 ; X64-LABEL: test_kandn:
6120 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6121 ; X64-NEXT: orl $-9, %eax ## encoding: [0x83,0xc8,0xf7]
6122 ; X64-NEXT: andl %esi, %eax ## encoding: [0x21,0xf0]
6123 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6124 ; X64-NEXT: retq ## encoding: [0xc3]
6125 %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
6126 %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
6130 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
6131 define i16 @test_knot(i16 %a0) {
6132 ; X86-LABEL: test_knot:
6134 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6135 ; X86-NEXT: notl %eax ## encoding: [0xf7,0xd0]
6136 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6137 ; X86-NEXT: retl ## encoding: [0xc3]
6139 ; X64-LABEL: test_knot:
6141 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6142 ; X64-NEXT: notl %eax ## encoding: [0xf7,0xd0]
6143 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6144 ; X64-NEXT: retq ## encoding: [0xc3]
6145 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
6149 declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone
6150 define i16 @test_kor(i16 %a0, i16 %a1) {
6151 ; X86-LABEL: test_kor:
6153 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6154 ; X86-NEXT: orw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x0b,0x44,0x24,0x08]
6155 ; X86-NEXT: orl $8, %eax ## encoding: [0x83,0xc8,0x08]
6156 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6157 ; X86-NEXT: retl ## encoding: [0xc3]
6159 ; X64-LABEL: test_kor:
6161 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6162 ; X64-NEXT: orl %esi, %eax ## encoding: [0x09,0xf0]
6163 ; X64-NEXT: orl $8, %eax ## encoding: [0x83,0xc8,0x08]
6164 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6165 ; X64-NEXT: retq ## encoding: [0xc3]
6166 %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
6167 %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
6171 declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
6172 ; TODO: the two kxnor instructions here a no op and should be elimintaed,
6173 ; probably by FoldConstantArithmetic in SelectionDAG.
6174 define i16 @test_kxnor(i16 %a0, i16 %a1) {
6175 ; X86-LABEL: test_kxnor:
6177 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6178 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
6179 ; X86-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6180 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6181 ; X86-NEXT: retl ## encoding: [0xc3]
6183 ; X64-LABEL: test_kxnor:
6185 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6186 ; X64-NEXT: xorl %esi, %eax ## encoding: [0x31,0xf0]
6187 ; X64-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6188 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6189 ; X64-NEXT: retq ## encoding: [0xc3]
6190 %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
6191 %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
6195 declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone
6196 define i16 @test_kxor(i16 %a0, i16 %a1) {
6197 ; X86-LABEL: test_kxor:
6199 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6200 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
6201 ; X86-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6202 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6203 ; X86-NEXT: retl ## encoding: [0xc3]
6205 ; X64-LABEL: test_kxor:
6207 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6208 ; X64-NEXT: xorl %esi, %eax ## encoding: [0x31,0xf0]
6209 ; X64-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6210 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6211 ; X64-NEXT: retq ## encoding: [0xc3]
6212 %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
6213 %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
6217 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
6218 define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
6219 ; CHECK-LABEL: test_kortestz:
6220 ; CHECK: ## %bb.0: ## %entry
6221 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
6222 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
6223 ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
6224 ; CHECK-NEXT: kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
6225 ; CHECK-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0]
6226 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6227 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6229 %0 = bitcast <8 x i64> %A to <16 x i32>
6230 %1 = bitcast <8 x i64> %B to <16 x i32>
6231 %2 = icmp ne <16 x i32> %0, %1
6232 %3 = bitcast <8 x i64> %C to <16 x i32>
6233 %4 = bitcast <8 x i64> %D to <16 x i32>
6234 %5 = icmp ne <16 x i32> %3, %4
6235 %6 = bitcast <16 x i1> %2 to i16
6236 %7 = bitcast <16 x i1> %5 to i16
6237 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
6241 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
6242 define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
6243 ; CHECK-LABEL: test_kortestc:
6244 ; CHECK: ## %bb.0: ## %entry
6245 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
6246 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
6247 ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
6248 ; CHECK-NEXT: kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
6249 ; CHECK-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0]
6250 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6251 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6253 %0 = bitcast <8 x i64> %A to <16 x i32>
6254 %1 = bitcast <8 x i64> %B to <16 x i32>
6255 %2 = icmp ne <16 x i32> %0, %1
6256 %3 = bitcast <8 x i64> %C to <16 x i32>
6257 %4 = bitcast <8 x i64> %D to <16 x i32>
6258 %5 = icmp ne <16 x i32> %3, %4
6259 %6 = bitcast <16 x i1> %2 to i16
6260 %7 = bitcast <16 x i1> %5 to i16
6261 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
6265 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
6266 ; CHECK-LABEL: test_cmpps:
6268 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
6269 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6270 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
6271 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6272 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6273 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
6276 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
6278 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
6279 ; CHECK-LABEL: test_cmppd:
6281 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
6282 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6283 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
6284 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6285 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6286 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
6289 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
6291 define <8 x i64> @test_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
6292 ; CHECK-LABEL: test_mul_epi32_rr:
6294 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
6295 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6296 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6300 define <8 x i64> @test_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
6301 ; X86-LABEL: test_mul_epi32_rrk:
6303 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6304 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6305 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
6306 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
6307 ; X86-NEXT: retl ## encoding: [0xc3]
6309 ; X64-LABEL: test_mul_epi32_rrk:
6311 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6312 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
6313 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
6314 ; X64-NEXT: retq ## encoding: [0xc3]
6315 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6316 %mask.cast = bitcast i8 %mask to <8 x i1>
6317 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
6321 define <8 x i64> @test_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
6322 ; X86-LABEL: test_mul_epi32_rrkz:
6324 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6325 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6326 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
6327 ; X86-NEXT: retl ## encoding: [0xc3]
6329 ; X64-LABEL: test_mul_epi32_rrkz:
6331 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6332 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
6333 ; X64-NEXT: retq ## encoding: [0xc3]
6334 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6335 %mask.cast = bitcast i8 %mask to <8 x i1>
6336 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
6340 define <8 x i64> @test_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
6341 ; X86-LABEL: test_mul_epi32_rm:
6343 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6344 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
6345 ; X86-NEXT: retl ## encoding: [0xc3]
6347 ; X64-LABEL: test_mul_epi32_rm:
6349 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
6350 ; X64-NEXT: retq ## encoding: [0xc3]
6351 %b = load <16 x i32>, <16 x i32>* %ptr_b
6352 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6356 define <8 x i64> @test_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
6357 ; X86-LABEL: test_mul_epi32_rmk:
6359 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6360 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6361 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6362 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
6363 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6364 ; X86-NEXT: retl ## encoding: [0xc3]
6366 ; X64-LABEL: test_mul_epi32_rmk:
6368 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6369 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
6370 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6371 ; X64-NEXT: retq ## encoding: [0xc3]
6372 %b = load <16 x i32>, <16 x i32>* %ptr_b
6373 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6374 %mask.cast = bitcast i8 %mask to <8 x i1>
6375 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
6379 define <8 x i64> @test_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
6380 ; X86-LABEL: test_mul_epi32_rmkz:
6382 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6383 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6384 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6385 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
6386 ; X86-NEXT: retl ## encoding: [0xc3]
6388 ; X64-LABEL: test_mul_epi32_rmkz:
6390 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6391 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
6392 ; X64-NEXT: retq ## encoding: [0xc3]
6393 %b = load <16 x i32>, <16 x i32>* %ptr_b
6394 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6395 %mask.cast = bitcast i8 %mask to <8 x i1>
6396 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
6400 define <8 x i64> @test_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
6401 ; X86-LABEL: test_mul_epi32_rmb:
6403 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6404 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
6405 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
6406 ; X86-NEXT: retl ## encoding: [0xc3]
6408 ; X64-LABEL: test_mul_epi32_rmb:
6410 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
6411 ; X64-NEXT: retq ## encoding: [0xc3]
6412 %q = load i64, i64* %ptr_b
6413 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
6414 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
6415 %b = bitcast <8 x i64> %b64 to <16 x i32>
6416 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6420 define <8 x i64> @test_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
6421 ; X86-LABEL: test_mul_epi32_rmbk:
6423 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6424 ; X86-NEXT: vpbroadcastd (%eax), %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x10]
6425 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
6426 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6427 ; X86-NEXT: vpmuldq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xca]
6428 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6429 ; X86-NEXT: retl ## encoding: [0xc3]
6431 ; X64-LABEL: test_mul_epi32_rmbk:
6433 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6434 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
6435 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6436 ; X64-NEXT: retq ## encoding: [0xc3]
6437 %q = load i64, i64* %ptr_b
6438 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
6439 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
6440 %b = bitcast <8 x i64> %b64 to <16 x i32>
6441 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6442 %mask.cast = bitcast i8 %mask to <8 x i1>
6443 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
6447 define <8 x i64> @test_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
6448 ; X86-LABEL: test_mul_epi32_rmbkz:
6450 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6451 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
6452 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
6453 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6454 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
6455 ; X86-NEXT: retl ## encoding: [0xc3]
6457 ; X64-LABEL: test_mul_epi32_rmbkz:
6459 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6460 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
6461 ; X64-NEXT: retq ## encoding: [0xc3]
6462 %q = load i64, i64* %ptr_b
6463 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
6464 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
6465 %b = bitcast <8 x i64> %b64 to <16 x i32>
6466 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
6467 %mask.cast = bitcast i8 %mask to <8 x i1>
6468 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
6472 declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>)
6474 define <8 x i64> @test_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
6475 ; CHECK-LABEL: test_mul_epu32_rr:
6477 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
6478 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6479 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6483 define <8 x i64> @test_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
6484 ; X86-LABEL: test_mul_epu32_rrk:
6486 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6487 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6488 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
6489 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
6490 ; X86-NEXT: retl ## encoding: [0xc3]
6492 ; X64-LABEL: test_mul_epu32_rrk:
6494 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6495 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
6496 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
6497 ; X64-NEXT: retq ## encoding: [0xc3]
6498 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6499 %mask.cast = bitcast i8 %mask to <8 x i1>
6500 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
6504 define <8 x i64> @test_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
6505 ; X86-LABEL: test_mul_epu32_rrkz:
6507 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6508 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6509 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
6510 ; X86-NEXT: retl ## encoding: [0xc3]
6512 ; X64-LABEL: test_mul_epu32_rrkz:
6514 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6515 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
6516 ; X64-NEXT: retq ## encoding: [0xc3]
6517 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6518 %mask.cast = bitcast i8 %mask to <8 x i1>
6519 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
6523 define <8 x i64> @test_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
6524 ; X86-LABEL: test_mul_epu32_rm:
6526 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6527 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
6528 ; X86-NEXT: retl ## encoding: [0xc3]
6530 ; X64-LABEL: test_mul_epu32_rm:
6532 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
6533 ; X64-NEXT: retq ## encoding: [0xc3]
6534 %b = load <16 x i32>, <16 x i32>* %ptr_b
6535 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6539 define <8 x i64> @test_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
6540 ; X86-LABEL: test_mul_epu32_rmk:
6542 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6543 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6544 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6545 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
6546 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6547 ; X86-NEXT: retl ## encoding: [0xc3]
6549 ; X64-LABEL: test_mul_epu32_rmk:
6551 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6552 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
6553 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6554 ; X64-NEXT: retq ## encoding: [0xc3]
6555 %b = load <16 x i32>, <16 x i32>* %ptr_b
6556 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6557 %mask.cast = bitcast i8 %mask to <8 x i1>
6558 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
6562 define <8 x i64> @test_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
6563 ; X86-LABEL: test_mul_epu32_rmkz:
6565 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6566 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6567 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6568 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
6569 ; X86-NEXT: retl ## encoding: [0xc3]
6571 ; X64-LABEL: test_mul_epu32_rmkz:
6573 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6574 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
6575 ; X64-NEXT: retq ## encoding: [0xc3]
6576 %b = load <16 x i32>, <16 x i32>* %ptr_b
6577 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6578 %mask.cast = bitcast i8 %mask to <8 x i1>
6579 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
6583 define <8 x i64> @test_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
6584 ; X86-LABEL: test_mul_epu32_rmb:
6586 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6587 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
6588 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
6589 ; X86-NEXT: retl ## encoding: [0xc3]
6591 ; X64-LABEL: test_mul_epu32_rmb:
6593 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
6594 ; X64-NEXT: retq ## encoding: [0xc3]
6595 %q = load i64, i64* %ptr_b
6596 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
6597 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
6598 %b = bitcast <8 x i64> %b64 to <16 x i32>
6599 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6603 define <8 x i64> @test_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
6604 ; X86-LABEL: test_mul_epu32_rmbk:
6606 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6607 ; X86-NEXT: vpbroadcastd (%eax), %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x10]
6608 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
6609 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6610 ; X86-NEXT: vpmuludq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xca]
6611 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6612 ; X86-NEXT: retl ## encoding: [0xc3]
6614 ; X64-LABEL: test_mul_epu32_rmbk:
6616 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6617 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
6618 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6619 ; X64-NEXT: retq ## encoding: [0xc3]
6620 %q = load i64, i64* %ptr_b
6621 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
6622 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
6623 %b = bitcast <8 x i64> %b64 to <16 x i32>
6624 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6625 %mask.cast = bitcast i8 %mask to <8 x i1>
6626 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
6630 define <8 x i64> @test_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
6631 ; X86-LABEL: test_mul_epu32_rmbkz:
6633 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6634 ; X86-NEXT: vpbroadcastd (%eax), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x08]
6635 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
6636 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6637 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
6638 ; X86-NEXT: retl ## encoding: [0xc3]
6640 ; X64-LABEL: test_mul_epu32_rmbkz:
6642 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6643 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
6644 ; X64-NEXT: retq ## encoding: [0xc3]
6645 %q = load i64, i64* %ptr_b
6646 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
6647 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
6648 %b = bitcast <8 x i64> %b64 to <16 x i32>
6649 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
6650 %mask.cast = bitcast i8 %mask to <8 x i1>
6651 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
6655 declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>)
6657 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
6658 ; X86-LABEL: test_x86_avx512_mm_cvtu32_sd:
6660 ; X86-NEXT: vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0x44,0x24,0x01]
6661 ; X86-NEXT: retl ## encoding: [0xc3]
6663 ; X64-LABEL: test_x86_avx512_mm_cvtu32_sd:
6665 ; X64-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0xc7]
6666 ; X64-NEXT: retq ## encoding: [0xc3]
6668 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
6669 ret <2 x double> %res
6671 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
6673 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
6674 ; X86-LABEL: test_x86_vbroadcast_ss_512:
6676 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6677 ; X86-NEXT: vbroadcastss (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x00]
6678 ; X86-NEXT: retl ## encoding: [0xc3]
6680 ; X64-LABEL: test_x86_vbroadcast_ss_512:
6682 ; X64-NEXT: vbroadcastss (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07]
6683 ; X64-NEXT: retq ## encoding: [0xc3]
6684 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
6685 ret <16 x float> %res
6687 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
6689 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
6690 ; X86-LABEL: test_x86_vbroadcast_sd_512:
6692 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6693 ; X86-NEXT: vbroadcastsd (%eax), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x00]
6694 ; X86-NEXT: retl ## encoding: [0xc3]
6696 ; X64-LABEL: test_x86_vbroadcast_sd_512:
6698 ; X64-NEXT: vbroadcastsd (%rdi), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07]
6699 ; X64-NEXT: retq ## encoding: [0xc3]
6700 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
6701 ret <8 x double> %res
6703 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
6705 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
6707 define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
6708 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_512:
6710 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xd8]
6711 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6712 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6713 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
6714 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
6715 ; X86-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
6716 ; X86-NEXT: vaddpd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3]
6717 ; X86-NEXT: retl ## encoding: [0xc3]
6719 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_512:
6721 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xd8]
6722 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6723 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
6724 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
6725 ; X64-NEXT: vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
6726 ; X64-NEXT: vaddpd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3]
6727 ; X64-NEXT: retq ## encoding: [0xc3]
6728 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
6729 %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
6730 %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
6731 %res3 = fadd <8 x double> %res, %res1
6732 %res4 = fadd <8 x double> %res3, %res2
6733 ret <8 x double> %res4
6736 declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
6738 define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
6739 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_512:
6741 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x36,0xd8]
6742 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6743 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6744 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
6745 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
6746 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
6747 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
6748 ; X86-NEXT: retl ## encoding: [0xc3]
6750 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_512:
6752 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x36,0xd8]
6753 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6754 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
6755 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
6756 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
6757 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
6758 ; X64-NEXT: retq ## encoding: [0xc3]
6759 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
6760 %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
6761 %res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
6762 %res3 = add <8 x i64> %res, %res1
6763 %res4 = add <8 x i64> %res3, %res2
6767 declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
6769 define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
6770 ; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
6772 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xd8]
6773 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6774 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
6775 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
6776 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
6777 ; X86-NEXT: vaddps %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc3]
6778 ; X86-NEXT: retl ## encoding: [0xc3]
6780 ; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
6782 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xd8]
6783 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6784 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
6785 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
6786 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
6787 ; X64-NEXT: vaddps %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc3]
6788 ; X64-NEXT: retq ## encoding: [0xc3]
6789 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
6790 %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
6791 %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
6792 %res3 = fadd <16 x float> %res, %res1
6793 %res4 = fadd <16 x float> %res3, %res2
6794 ret <16 x float> %res4
6797 declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6799 define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
6800 ; X86-LABEL: test_int_x86_avx512_mask_permvar_si_512:
6802 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x36,0xd8]
6803 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6804 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
6805 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
6806 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
6807 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
6808 ; X86-NEXT: retl ## encoding: [0xc3]
6810 ; X64-LABEL: test_int_x86_avx512_mask_permvar_si_512:
6812 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x36,0xd8]
6813 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6814 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
6815 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
6816 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
6817 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
6818 ; X64-NEXT: retq ## encoding: [0xc3]
6819 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
6820 %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
6821 %res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
6822 %res3 = add <16 x i32> %res, %res1
6823 %res4 = add <16 x i32> %res3, %res2
6824 ret <16 x i32> %res4
6827 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6829 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6830 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
6832 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6833 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
6834 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6835 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
6836 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
6837 ; X86-NEXT: retl ## encoding: [0xc3]
6839 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
6841 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6842 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
6843 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6844 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
6845 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
6846 ; X64-NEXT: retq ## encoding: [0xc3]
6847 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6848 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6849 %res2 = add <16 x i32> %res, %res1
6850 ret <16 x i32> %res2
6853 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6855 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6856 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
6858 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6859 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
6860 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6861 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
6862 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
6863 ; X86-NEXT: retl ## encoding: [0xc3]
6865 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
6867 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6868 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
6869 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6870 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
6871 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
6872 ; X64-NEXT: retq ## encoding: [0xc3]
6873 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6874 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6875 %res2 = add <16 x i32> %res, %res1
6876 ret <16 x i32> %res2
6879 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6881 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6882 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
6884 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6885 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
6886 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6887 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6888 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
6889 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
6890 ; X86-NEXT: retl ## encoding: [0xc3]
6892 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
6894 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6895 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
6896 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6897 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
6898 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
6899 ; X64-NEXT: retq ## encoding: [0xc3]
6900 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6901 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6902 %res2 = add <8 x i64> %res, %res1
6906 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6908 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6909 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
6911 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6912 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
6913 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6914 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6915 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
6916 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
6917 ; X86-NEXT: retl ## encoding: [0xc3]
6919 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
6921 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
6922 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
6923 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6924 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
6925 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
6926 ; X64-NEXT: retq ## encoding: [0xc3]
6927 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6928 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6929 %res2 = add <8 x i64> %res, %res1
6933 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6935 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
6936 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
6938 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6939 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
6940 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
6941 ; X86-NEXT: vpermi2d (%eax), %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x18]
6942 ; X86-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2]
6943 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
6944 ; X86-NEXT: retl ## encoding: [0xc3]
6946 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
6948 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6949 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
6950 ; X64-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x1f]
6951 ; X64-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2]
6952 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
6953 ; X64-NEXT: retq ## encoding: [0xc3]
6954 %x2 = load <16 x i32>, <16 x i32>* %x2p
6955 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
6956 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
6957 %res2 = add <16 x i32> %res, %res1
6958 ret <16 x i32> %res2
6961 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
6963 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
6964 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
6966 ; X86-NEXT: vmovapd %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd8]
6967 ; X86-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xda]
6968 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6969 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6970 ; X86-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
6971 ; X86-NEXT: vaddpd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc3]
6972 ; X86-NEXT: retl ## encoding: [0xc3]
6974 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
6976 ; X64-NEXT: vmovapd %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd8]
6977 ; X64-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xda]
6978 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6979 ; X64-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
6980 ; X64-NEXT: vaddpd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc3]
6981 ; X64-NEXT: retq ## encoding: [0xc3]
6982 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
6983 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
6984 %res2 = fadd <8 x double> %res, %res1
6985 ret <8 x double> %res2
6988 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
6990 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
6991 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
6993 ; X86-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
6994 ; X86-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xda]
6995 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6996 ; X86-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
6997 ; X86-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
6998 ; X86-NEXT: retl ## encoding: [0xc3]
7000 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
7002 ; X64-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
7003 ; X64-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xda]
7004 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7005 ; X64-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
7006 ; X64-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
7007 ; X64-NEXT: retq ## encoding: [0xc3]
7008 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
7009 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
7010 %res2 = fadd <16 x float> %res, %res1
7011 ret <16 x float> %res2
7014 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7016 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7017 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
7019 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
7020 ; X86-NEXT: vpermt2q %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xda]
7021 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7022 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7023 ; X86-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
7024 ; X86-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
7025 ; X86-NEXT: retl ## encoding: [0xc3]
7027 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
7029 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
7030 ; X64-NEXT: vpermt2q %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xda]
7031 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7032 ; X64-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
7033 ; X64-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
7034 ; X64-NEXT: retq ## encoding: [0xc3]
7035 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7036 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
7037 %res2 = add <8 x i64> %res, %res1
7041 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7043 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
7044 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
7046 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7047 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7048 ; X86-NEXT: vmovdqa64 %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd1]
7049 ; X86-NEXT: vpermt2d (%eax), %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7e,0x10]
7050 ; X86-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xc9]
7051 ; X86-NEXT: vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1]
7052 ; X86-NEXT: retl ## encoding: [0xc3]
7054 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
7056 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7057 ; X64-NEXT: vmovdqa64 %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd1]
7058 ; X64-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7e,0x17]
7059 ; X64-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xc9]
7060 ; X64-NEXT: vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1]
7061 ; X64-NEXT: retq ## encoding: [0xc3]
7062 %x2 = load <16 x i32>, <16 x i32>* %x2p
7063 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7064 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
7065 %res2 = add <16 x i32> %res, %res1
7066 ret <16 x i32> %res2
7069 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
7071 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
7072 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
7074 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7075 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7076 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7077 ; X86-NEXT: vmovapd %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd1]
7078 ; X86-NEXT: vpermt2pd (%eax){1to8}, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x7f,0x10]
7079 ; X86-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7f,0xc9]
7080 ; X86-NEXT: vaddpd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc1]
7081 ; X86-NEXT: retl ## encoding: [0xc3]
7083 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
7085 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7086 ; X64-NEXT: vmovapd %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd1]
7087 ; X64-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x7f,0x17]
7088 ; X64-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7f,0xc9]
7089 ; X64-NEXT: vaddpd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc1]
7090 ; X64-NEXT: retq ## encoding: [0xc3]
7091 %x2s = load double, double* %x2ptr
7092 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
7093 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
7094 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
7095 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
7096 %res2 = fadd <8 x double> %res, %res1
7097 ret <8 x double> %res2
7100 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
7102 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
7103 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
7105 ; X86-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
7106 ; X86-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7f,0xda]
7107 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7108 ; X86-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7f,0xca]
7109 ; X86-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
7110 ; X86-NEXT: retl ## encoding: [0xc3]
7112 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
7114 ; X64-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
7115 ; X64-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7f,0xda]
7116 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7117 ; X64-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7f,0xca]
7118 ; X64-NEXT: vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
7119 ; X64-NEXT: retq ## encoding: [0xc3]
7120 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
7121 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
7122 %res2 = fadd <16 x float> %res, %res1
7123 ret <16 x float> %res2
7127 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7129 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7130 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
7132 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
7133 ; X86-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x7e,0xda]
7134 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7135 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7136 ; X86-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7e,0xca]
7137 ; X86-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
7138 ; X86-NEXT: retl ## encoding: [0xc3]
7140 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
7142 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
7143 ; X64-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x7e,0xda]
7144 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7145 ; X64-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7e,0xca]
7146 ; X64-NEXT: vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
7147 ; X64-NEXT: retq ## encoding: [0xc3]
7148 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7149 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
7150 %res2 = add <8 x i64> %res, %res1
7154 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7156 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
7157 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
7159 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
7160 ; X86-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xda]
7161 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7162 ; X86-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
7163 ; X86-NEXT: vpaddd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc3]
7164 ; X86-NEXT: retl ## encoding: [0xc3]
7166 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
7168 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
7169 ; X64-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xda]
7170 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7171 ; X64-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
7172 ; X64-NEXT: vpaddd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc3]
7173 ; X64-NEXT: retq ## encoding: [0xc3]
7174 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7175 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
7176 %res2 = add <16 x i32> %res, %res1
7177 ret <16 x i32> %res2
7180 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
7181 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
7182 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
7184 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
7185 ; CHECK-LABEL: test_vsubps_rn:
7187 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
7188 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7189 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7190 <16 x float> zeroinitializer, i16 -1, i32 8)
7191 ret <16 x float> %res
7194 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
7195 ; CHECK-LABEL: test_vsubps_rd:
7197 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
7198 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7199 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7200 <16 x float> zeroinitializer, i16 -1, i32 9)
7201 ret <16 x float> %res
7204 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
7205 ; CHECK-LABEL: test_vsubps_ru:
7207 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
7208 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7209 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7210 <16 x float> zeroinitializer, i16 -1, i32 10)
7211 ret <16 x float> %res
7214 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
7215 ; CHECK-LABEL: test_vsubps_rz:
7217 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
7218 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7219 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7220 <16 x float> zeroinitializer, i16 -1, i32 11)
7221 ret <16 x float> %res
7224 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
7225 ; CHECK-LABEL: test_vmulps_rn:
7227 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
7228 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7229 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7230 <16 x float> zeroinitializer, i16 -1, i32 8)
7231 ret <16 x float> %res
7234 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
7235 ; CHECK-LABEL: test_vmulps_rd:
7237 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
7238 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7239 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7240 <16 x float> zeroinitializer, i16 -1, i32 9)
7241 ret <16 x float> %res
7244 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
7245 ; CHECK-LABEL: test_vmulps_ru:
7247 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
7248 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7249 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7250 <16 x float> zeroinitializer, i16 -1, i32 10)
7251 ret <16 x float> %res
7254 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
7255 ; CHECK-LABEL: test_vmulps_rz:
7257 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
7258 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7259 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7260 <16 x float> zeroinitializer, i16 -1, i32 11)
7261 ret <16 x float> %res
7265 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7266 ; X86-LABEL: test_vmulps_mask_rn:
7268 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7269 ; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
7270 ; X86-NEXT: retl ## encoding: [0xc3]
7272 ; X64-LABEL: test_vmulps_mask_rn:
7274 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7275 ; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
7276 ; X64-NEXT: retq ## encoding: [0xc3]
7277 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7278 <16 x float> zeroinitializer, i16 %mask, i32 8)
7279 ret <16 x float> %res
7282 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7283 ; X86-LABEL: test_vmulps_mask_rd:
7285 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7286 ; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
7287 ; X86-NEXT: retl ## encoding: [0xc3]
7289 ; X64-LABEL: test_vmulps_mask_rd:
7291 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7292 ; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
7293 ; X64-NEXT: retq ## encoding: [0xc3]
7294 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7295 <16 x float> zeroinitializer, i16 %mask, i32 9)
7296 ret <16 x float> %res
7299 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7300 ; X86-LABEL: test_vmulps_mask_ru:
7302 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7303 ; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
7304 ; X86-NEXT: retl ## encoding: [0xc3]
7306 ; X64-LABEL: test_vmulps_mask_ru:
7308 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7309 ; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
7310 ; X64-NEXT: retq ## encoding: [0xc3]
7311 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7312 <16 x float> zeroinitializer, i16 %mask, i32 10)
7313 ret <16 x float> %res
7316 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7317 ; X86-LABEL: test_vmulps_mask_rz:
7319 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7320 ; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
7321 ; X86-NEXT: retl ## encoding: [0xc3]
7323 ; X64-LABEL: test_vmulps_mask_rz:
7325 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7326 ; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
7327 ; X64-NEXT: retq ## encoding: [0xc3]
7328 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7329 <16 x float> zeroinitializer, i16 %mask, i32 11)
7330 ret <16 x float> %res
7333 ;; With Passthru value
7334 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
7335 ; X86-LABEL: test_vmulps_mask_passthru_rn:
7337 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7338 ; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
7339 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7340 ; X86-NEXT: retl ## encoding: [0xc3]
7342 ; X64-LABEL: test_vmulps_mask_passthru_rn:
7344 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7345 ; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
7346 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7347 ; X64-NEXT: retq ## encoding: [0xc3]
7348 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7349 <16 x float> %passthru, i16 %mask, i32 8)
7350 ret <16 x float> %res
7353 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
7354 ; X86-LABEL: test_vmulps_mask_passthru_rd:
7356 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7357 ; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
7358 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7359 ; X86-NEXT: retl ## encoding: [0xc3]
7361 ; X64-LABEL: test_vmulps_mask_passthru_rd:
7363 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7364 ; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
7365 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7366 ; X64-NEXT: retq ## encoding: [0xc3]
7367 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7368 <16 x float> %passthru, i16 %mask, i32 9)
7369 ret <16 x float> %res
7372 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
7373 ; X86-LABEL: test_vmulps_mask_passthru_ru:
7375 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7376 ; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
7377 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7378 ; X86-NEXT: retl ## encoding: [0xc3]
7380 ; X64-LABEL: test_vmulps_mask_passthru_ru:
7382 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7383 ; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
7384 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7385 ; X64-NEXT: retq ## encoding: [0xc3]
7386 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7387 <16 x float> %passthru, i16 %mask, i32 10)
7388 ret <16 x float> %res
7391 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
7392 ; X86-LABEL: test_vmulps_mask_passthru_rz:
7394 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7395 ; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
7396 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7397 ; X86-NEXT: retl ## encoding: [0xc3]
7399 ; X64-LABEL: test_vmulps_mask_passthru_rz:
7401 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7402 ; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
7403 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7404 ; X64-NEXT: retq ## encoding: [0xc3]
7405 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7406 <16 x float> %passthru, i16 %mask, i32 11)
7407 ret <16 x float> %res
7411 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
7412 ; X86-LABEL: test_vmulpd_mask_rn:
7414 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7415 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7416 ; X86-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
7417 ; X86-NEXT: retl ## encoding: [0xc3]
7419 ; X64-LABEL: test_vmulpd_mask_rn:
7421 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7422 ; X64-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
7423 ; X64-NEXT: retq ## encoding: [0xc3]
7424 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
7425 <8 x double> zeroinitializer, i8 %mask, i32 8)
7426 ret <8 x double> %res
7429 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
7430 ; X86-LABEL: test_vmulpd_mask_rd:
7432 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7433 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7434 ; X86-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
7435 ; X86-NEXT: retl ## encoding: [0xc3]
7437 ; X64-LABEL: test_vmulpd_mask_rd:
7439 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7440 ; X64-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
7441 ; X64-NEXT: retq ## encoding: [0xc3]
7442 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
7443 <8 x double> zeroinitializer, i8 %mask, i32 9)
7444 ret <8 x double> %res
7447 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
7448 ; X86-LABEL: test_vmulpd_mask_ru:
7450 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7451 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7452 ; X86-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
7453 ; X86-NEXT: retl ## encoding: [0xc3]
7455 ; X64-LABEL: test_vmulpd_mask_ru:
7457 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7458 ; X64-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
7459 ; X64-NEXT: retq ## encoding: [0xc3]
7460 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
7461 <8 x double> zeroinitializer, i8 %mask, i32 10)
7462 ret <8 x double> %res
7465 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
7466 ; X86-LABEL: test_vmulpd_mask_rz:
7468 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7469 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7470 ; X86-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
7471 ; X86-NEXT: retl ## encoding: [0xc3]
7473 ; X64-LABEL: test_vmulpd_mask_rz:
7475 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7476 ; X64-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
7477 ; X64-NEXT: retq ## encoding: [0xc3]
7478 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
7479 <8 x double> zeroinitializer, i8 %mask, i32 11)
7480 ret <8 x double> %res
7483 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7484 ; X86-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
7486 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7487 ; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
7488 ; X86-NEXT: retl ## encoding: [0xc3]
7490 ; X64-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
7492 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7493 ; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
7494 ; X64-NEXT: retq ## encoding: [0xc3]
7495 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
7496 ret <16 x float> %res
7498 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7499 ; X86-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
7501 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7502 ; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
7503 ; X86-NEXT: retl ## encoding: [0xc3]
7505 ; X64-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
7507 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7508 ; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
7509 ; X64-NEXT: retq ## encoding: [0xc3]
7510 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
7511 ret <16 x float> %res
7513 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7514 ; X86-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
7516 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7517 ; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
7518 ; X86-NEXT: retl ## encoding: [0xc3]
7520 ; X64-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
7522 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7523 ; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
7524 ; X64-NEXT: retq ## encoding: [0xc3]
7525 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
7526 ret <16 x float> %res
7529 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7530 ; X86-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
7532 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7533 ; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
7534 ; X86-NEXT: retl ## encoding: [0xc3]
7536 ; X64-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
7538 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7539 ; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
7540 ; X64-NEXT: retq ## encoding: [0xc3]
7541 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
7542 ret <16 x float> %res
7546 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7547 ; X86-LABEL: test_mm512_maskz_add_round_ps_current:
7549 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7550 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
7551 ; X86-NEXT: retl ## encoding: [0xc3]
7553 ; X64-LABEL: test_mm512_maskz_add_round_ps_current:
7555 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7556 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
7557 ; X64-NEXT: retq ## encoding: [0xc3]
7558 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
7559 ret <16 x float> %res
7562 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7563 ; X86-LABEL: test_mm512_mask_add_round_ps_rn_sae:
7565 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7566 ; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
7567 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7568 ; X86-NEXT: retl ## encoding: [0xc3]
7570 ; X64-LABEL: test_mm512_mask_add_round_ps_rn_sae:
7572 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7573 ; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
7574 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7575 ; X64-NEXT: retq ## encoding: [0xc3]
7576 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
7577 ret <16 x float> %res
7579 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7580 ; X86-LABEL: test_mm512_mask_add_round_ps_rd_sae:
7582 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7583 ; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
7584 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7585 ; X86-NEXT: retl ## encoding: [0xc3]
7587 ; X64-LABEL: test_mm512_mask_add_round_ps_rd_sae:
7589 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7590 ; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
7591 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7592 ; X64-NEXT: retq ## encoding: [0xc3]
7593 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
7594 ret <16 x float> %res
7596 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7597 ; X86-LABEL: test_mm512_mask_add_round_ps_ru_sae:
7599 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7600 ; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
7601 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7602 ; X86-NEXT: retl ## encoding: [0xc3]
7604 ; X64-LABEL: test_mm512_mask_add_round_ps_ru_sae:
7606 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7607 ; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
7608 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7609 ; X64-NEXT: retq ## encoding: [0xc3]
7610 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
7611 ret <16 x float> %res
7614 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7615 ; X86-LABEL: test_mm512_mask_add_round_ps_rz_sae:
7617 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7618 ; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
7619 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7620 ; X86-NEXT: retl ## encoding: [0xc3]
7622 ; X64-LABEL: test_mm512_mask_add_round_ps_rz_sae:
7624 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7625 ; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
7626 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7627 ; X64-NEXT: retq ## encoding: [0xc3]
7628 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
7629 ret <16 x float> %res
7633 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7634 ; X86-LABEL: test_mm512_mask_add_round_ps_current:
7636 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7637 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
7638 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7639 ; X86-NEXT: retl ## encoding: [0xc3]
7641 ; X64-LABEL: test_mm512_mask_add_round_ps_current:
7643 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7644 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
7645 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7646 ; X64-NEXT: retq ## encoding: [0xc3]
7647 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
7648 ret <16 x float> %res
7652 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7653 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
7655 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1]
7656 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7657 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
7658 ret <16 x float> %res
7660 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7661 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
7663 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1]
7664 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7665 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
7666 ret <16 x float> %res
7668 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7669 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
7671 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1]
7672 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7673 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
7674 ret <16 x float> %res
7677 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7678 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
7680 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1]
7681 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7682 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
7683 ret <16 x float> %res
7686 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7687 ; CHECK-LABEL: test_mm512_add_round_ps_current:
7689 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
7690 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7691 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
7692 ret <16 x float> %res
7694 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
7696 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7697 ; X86-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
7699 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7700 ; X86-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
7701 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7702 ; X86-NEXT: retl ## encoding: [0xc3]
7704 ; X64-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
7706 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7707 ; X64-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
7708 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7709 ; X64-NEXT: retq ## encoding: [0xc3]
7710 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
7711 ret <16 x float> %res
7713 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7714 ; X86-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
7716 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7717 ; X86-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
7718 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7719 ; X86-NEXT: retl ## encoding: [0xc3]
7721 ; X64-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
7723 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7724 ; X64-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
7725 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7726 ; X64-NEXT: retq ## encoding: [0xc3]
7727 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
7728 ret <16 x float> %res
7730 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7731 ; X86-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
7733 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7734 ; X86-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
7735 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7736 ; X86-NEXT: retl ## encoding: [0xc3]
7738 ; X64-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
7740 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7741 ; X64-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
7742 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7743 ; X64-NEXT: retq ## encoding: [0xc3]
7744 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
7745 ret <16 x float> %res
7748 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7749 ; X86-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
7751 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7752 ; X86-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
7753 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7754 ; X86-NEXT: retl ## encoding: [0xc3]
7756 ; X64-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
7758 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7759 ; X64-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
7760 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7761 ; X64-NEXT: retq ## encoding: [0xc3]
7762 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
7763 ret <16 x float> %res
7767 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7768 ; X86-LABEL: test_mm512_mask_sub_round_ps_current:
7770 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7771 ; X86-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
7772 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7773 ; X86-NEXT: retl ## encoding: [0xc3]
7775 ; X64-LABEL: test_mm512_mask_sub_round_ps_current:
7777 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7778 ; X64-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
7779 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7780 ; X64-NEXT: retq ## encoding: [0xc3]
7781 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
7782 ret <16 x float> %res
7785 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7786 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
7788 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
7789 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7790 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
7791 ret <16 x float> %res
7793 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7794 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
7796 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
7797 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7798 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
7799 ret <16 x float> %res
7801 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7802 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
7804 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
7805 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7806 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
7807 ret <16 x float> %res
7810 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7811 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
7813 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
7814 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7815 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
7816 ret <16 x float> %res
7819 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7820 ; CHECK-LABEL: test_mm512_sub_round_ps_current:
7822 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5c,0xc1]
7823 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7824 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
7825 ret <16 x float> %res
7828 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7829 ; X86-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
7831 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7832 ; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
7833 ; X86-NEXT: retl ## encoding: [0xc3]
7835 ; X64-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
7837 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7838 ; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
7839 ; X64-NEXT: retq ## encoding: [0xc3]
7840 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
7841 ret <16 x float> %res
7843 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7844 ; X86-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
7846 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7847 ; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
7848 ; X86-NEXT: retl ## encoding: [0xc3]
7850 ; X64-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
7852 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7853 ; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
7854 ; X64-NEXT: retq ## encoding: [0xc3]
7855 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
7856 ret <16 x float> %res
7858 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7859 ; X86-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
7861 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7862 ; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
7863 ; X86-NEXT: retl ## encoding: [0xc3]
7865 ; X64-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
7867 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7868 ; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
7869 ; X64-NEXT: retq ## encoding: [0xc3]
7870 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
7871 ret <16 x float> %res
7874 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7875 ; X86-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
7877 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7878 ; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
7879 ; X86-NEXT: retl ## encoding: [0xc3]
7881 ; X64-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
7883 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7884 ; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
7885 ; X64-NEXT: retq ## encoding: [0xc3]
7886 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
7887 ret <16 x float> %res
7891 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7892 ; X86-LABEL: test_mm512_maskz_div_round_ps_current:
7894 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7895 ; X86-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
7896 ; X86-NEXT: retl ## encoding: [0xc3]
7898 ; X64-LABEL: test_mm512_maskz_div_round_ps_current:
7900 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7901 ; X64-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
7902 ; X64-NEXT: retq ## encoding: [0xc3]
7903 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
7904 ret <16 x float> %res
7907 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7908 ; X86-LABEL: test_mm512_mask_div_round_ps_rn_sae:
7910 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7911 ; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
7912 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7913 ; X86-NEXT: retl ## encoding: [0xc3]
7915 ; X64-LABEL: test_mm512_mask_div_round_ps_rn_sae:
7917 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7918 ; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
7919 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7920 ; X64-NEXT: retq ## encoding: [0xc3]
7921 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
7922 ret <16 x float> %res
7924 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7925 ; X86-LABEL: test_mm512_mask_div_round_ps_rd_sae:
7927 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7928 ; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
7929 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7930 ; X86-NEXT: retl ## encoding: [0xc3]
7932 ; X64-LABEL: test_mm512_mask_div_round_ps_rd_sae:
7934 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7935 ; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
7936 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7937 ; X64-NEXT: retq ## encoding: [0xc3]
7938 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
7939 ret <16 x float> %res
7941 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7942 ; X86-LABEL: test_mm512_mask_div_round_ps_ru_sae:
7944 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7945 ; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
7946 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7947 ; X86-NEXT: retl ## encoding: [0xc3]
7949 ; X64-LABEL: test_mm512_mask_div_round_ps_ru_sae:
7951 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7952 ; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
7953 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7954 ; X64-NEXT: retq ## encoding: [0xc3]
7955 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
7956 ret <16 x float> %res
7959 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7960 ; X86-LABEL: test_mm512_mask_div_round_ps_rz_sae:
7962 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7963 ; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
7964 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7965 ; X86-NEXT: retl ## encoding: [0xc3]
7967 ; X64-LABEL: test_mm512_mask_div_round_ps_rz_sae:
7969 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7970 ; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
7971 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7972 ; X64-NEXT: retq ## encoding: [0xc3]
7973 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
7974 ret <16 x float> %res
7978 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
7979 ; X86-LABEL: test_mm512_mask_div_round_ps_current:
7981 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7982 ; X86-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
7983 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7984 ; X86-NEXT: retl ## encoding: [0xc3]
7986 ; X64-LABEL: test_mm512_mask_div_round_ps_current:
7988 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7989 ; X64-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
7990 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7991 ; X64-NEXT: retq ## encoding: [0xc3]
7992 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
7993 ret <16 x float> %res
7997 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
7998 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
8000 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1]
8001 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8002 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8003 ret <16 x float> %res
8005 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8006 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
8008 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1]
8009 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8010 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
8011 ret <16 x float> %res
8013 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8014 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
8016 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1]
8017 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8018 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
8019 ret <16 x float> %res
8022 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8023 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
8025 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1]
8026 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8027 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
8028 ret <16 x float> %res
8031 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8032 ; CHECK-LABEL: test_mm512_div_round_ps_current:
8034 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5e,0xc1]
8035 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8036 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8037 ret <16 x float> %res
8039 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
8041 define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
8042 ; X86-LABEL: test_mask_compress_store_pd_512:
8044 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8045 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8046 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8047 ; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
8048 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8049 ; X86-NEXT: retl ## encoding: [0xc3]
8051 ; X64-LABEL: test_mask_compress_store_pd_512:
8053 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8054 ; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
8055 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8056 ; X64-NEXT: retq ## encoding: [0xc3]
8057 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8061 declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8063 define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
8064 ; X86-LABEL: test_compress_store_pd_512:
8066 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8067 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8068 ; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
8069 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8070 ; X86-NEXT: retl ## encoding: [0xc3]
8072 ; X64-LABEL: test_compress_store_pd_512:
8074 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8075 ; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
8076 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8077 ; X64-NEXT: retq ## encoding: [0xc3]
8078 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
8082 define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
8083 ; X86-LABEL: test_mask_compress_store_ps_512:
8085 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8086 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8087 ; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
8088 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8089 ; X86-NEXT: retl ## encoding: [0xc3]
8091 ; X64-LABEL: test_mask_compress_store_ps_512:
8093 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8094 ; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
8095 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8096 ; X64-NEXT: retq ## encoding: [0xc3]
8097 call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8101 declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8103 define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
8104 ; X86-LABEL: test_compress_store_ps_512:
8106 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8107 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8108 ; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
8109 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8110 ; X86-NEXT: retl ## encoding: [0xc3]
8112 ; X64-LABEL: test_compress_store_ps_512:
8114 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8115 ; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
8116 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8117 ; X64-NEXT: retq ## encoding: [0xc3]
8118 call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
8122 define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
8123 ; X86-LABEL: test_mask_compress_store_q_512:
8125 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8126 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8127 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8128 ; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
8129 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8130 ; X86-NEXT: retl ## encoding: [0xc3]
8132 ; X64-LABEL: test_mask_compress_store_q_512:
8134 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8135 ; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
8136 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8137 ; X64-NEXT: retq ## encoding: [0xc3]
8138 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8142 declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8144 define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
8145 ; X86-LABEL: test_compress_store_q_512:
8147 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8148 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8149 ; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
8150 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8151 ; X86-NEXT: retl ## encoding: [0xc3]
8153 ; X64-LABEL: test_compress_store_q_512:
8155 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8156 ; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
8157 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8158 ; X64-NEXT: retq ## encoding: [0xc3]
8159 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
8163 define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
8164 ; X86-LABEL: test_mask_compress_store_d_512:
8166 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8167 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8168 ; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
8169 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8170 ; X86-NEXT: retl ## encoding: [0xc3]
8172 ; X64-LABEL: test_mask_compress_store_d_512:
8174 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8175 ; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
8176 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8177 ; X64-NEXT: retq ## encoding: [0xc3]
8178 call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8182 declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8184 define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
8185 ; X86-LABEL: test_compress_store_d_512:
8187 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8188 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8189 ; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
8190 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8191 ; X86-NEXT: retl ## encoding: [0xc3]
8193 ; X64-LABEL: test_compress_store_d_512:
8195 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8196 ; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
8197 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8198 ; X64-NEXT: retq ## encoding: [0xc3]
8199 call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
8203 define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
8204 ; X86-LABEL: test_mask_expand_load_pd_512:
8206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8207 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8208 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8209 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
8210 ; X86-NEXT: retl ## encoding: [0xc3]
8212 ; X64-LABEL: test_mask_expand_load_pd_512:
8214 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8215 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
8216 ; X64-NEXT: retq ## encoding: [0xc3]
8217 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8218 ret <8 x double> %res
8221 define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
8222 ; X86-LABEL: test_maskz_expand_load_pd_512:
8224 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8225 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8226 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8227 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x00]
8228 ; X86-NEXT: retl ## encoding: [0xc3]
8230 ; X64-LABEL: test_maskz_expand_load_pd_512:
8232 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8233 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x07]
8234 ; X64-NEXT: retq ## encoding: [0xc3]
8235 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
8236 ret <8 x double> %res
8239 declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8241 define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
8242 ; X86-LABEL: test_expand_load_pd_512:
8244 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8245 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8246 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
8247 ; X86-NEXT: retl ## encoding: [0xc3]
8249 ; X64-LABEL: test_expand_load_pd_512:
8251 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8252 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
8253 ; X64-NEXT: retq ## encoding: [0xc3]
8254 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
8255 ret <8 x double> %res
8258 ; Make sure we don't crash if you pass 0 to the mask.
8259 define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
8260 ; CHECK-LABEL: test_zero_mask_expand_load_pd_512:
8262 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8263 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0)
8264 ret <8 x double> %res
8267 define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
8268 ; X86-LABEL: test_mask_expand_load_ps_512:
8270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8271 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8272 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
8273 ; X86-NEXT: retl ## encoding: [0xc3]
8275 ; X64-LABEL: test_mask_expand_load_ps_512:
8277 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8278 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
8279 ; X64-NEXT: retq ## encoding: [0xc3]
8280 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8281 ret <16 x float> %res
8284 define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
8285 ; X86-LABEL: test_maskz_expand_load_ps_512:
8287 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8288 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8289 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x00]
8290 ; X86-NEXT: retl ## encoding: [0xc3]
8292 ; X64-LABEL: test_maskz_expand_load_ps_512:
8294 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8295 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x07]
8296 ; X64-NEXT: retq ## encoding: [0xc3]
8297 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
8298 ret <16 x float> %res
8301 declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8303 define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
8304 ; X86-LABEL: test_expand_load_ps_512:
8306 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8307 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8308 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
8309 ; X86-NEXT: retl ## encoding: [0xc3]
8311 ; X64-LABEL: test_expand_load_ps_512:
8313 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8314 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
8315 ; X64-NEXT: retq ## encoding: [0xc3]
8316 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
8317 ret <16 x float> %res
8320 define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
8321 ; X86-LABEL: test_mask_expand_load_q_512:
8323 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8324 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8325 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8326 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
8327 ; X86-NEXT: retl ## encoding: [0xc3]
8329 ; X64-LABEL: test_mask_expand_load_q_512:
8331 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8332 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
8333 ; X64-NEXT: retq ## encoding: [0xc3]
8334 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8338 define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
8339 ; X86-LABEL: test_maskz_expand_load_q_512:
8341 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8342 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8343 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8344 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x00]
8345 ; X86-NEXT: retl ## encoding: [0xc3]
8347 ; X64-LABEL: test_maskz_expand_load_q_512:
8349 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8350 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07]
8351 ; X64-NEXT: retq ## encoding: [0xc3]
8352 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
8356 declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8358 define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
8359 ; X86-LABEL: test_expand_load_q_512:
8361 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8362 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8363 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
8364 ; X86-NEXT: retl ## encoding: [0xc3]
8366 ; X64-LABEL: test_expand_load_q_512:
8368 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8369 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
8370 ; X64-NEXT: retq ## encoding: [0xc3]
8371 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
8375 define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
8376 ; X86-LABEL: test_mask_expand_load_d_512:
8378 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8379 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8380 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
8381 ; X86-NEXT: retl ## encoding: [0xc3]
8383 ; X64-LABEL: test_mask_expand_load_d_512:
8385 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8386 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
8387 ; X64-NEXT: retq ## encoding: [0xc3]
8388 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8392 define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
8393 ; X86-LABEL: test_maskz_expand_load_d_512:
8395 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8396 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8397 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x00]
8398 ; X86-NEXT: retl ## encoding: [0xc3]
8400 ; X64-LABEL: test_maskz_expand_load_d_512:
8402 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8403 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x07]
8404 ; X64-NEXT: retq ## encoding: [0xc3]
8405 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
8409 declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8411 define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
8412 ; X86-LABEL: test_expand_load_d_512:
8414 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8415 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8416 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
8417 ; X86-NEXT: retl ## encoding: [0xc3]
8419 ; X64-LABEL: test_expand_load_d_512:
8421 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8422 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
8423 ; X64-NEXT: retq ## encoding: [0xc3]
8424 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
8428 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8429 ; X86-LABEL: test_mm512_maskz_min_round_ps_sae:
8431 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8432 ; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
8433 ; X86-NEXT: retl ## encoding: [0xc3]
8435 ; X64-LABEL: test_mm512_maskz_min_round_ps_sae:
8437 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8438 ; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
8439 ; X64-NEXT: retq ## encoding: [0xc3]
8440 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
8441 ret <16 x float> %res
8444 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8445 ; X86-LABEL: test_mm512_maskz_min_round_ps_current:
8447 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8448 ; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
8449 ; X86-NEXT: retl ## encoding: [0xc3]
8451 ; X64-LABEL: test_mm512_maskz_min_round_ps_current:
8453 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8454 ; X64-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
8455 ; X64-NEXT: retq ## encoding: [0xc3]
8456 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
8457 ret <16 x float> %res
8460 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8461 ; X86-LABEL: test_mm512_mask_min_round_ps_sae:
8463 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8464 ; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
8465 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8466 ; X86-NEXT: retl ## encoding: [0xc3]
8468 ; X64-LABEL: test_mm512_mask_min_round_ps_sae:
8470 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8471 ; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
8472 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8473 ; X64-NEXT: retq ## encoding: [0xc3]
8474 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8475 ret <16 x float> %res
8478 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8479 ; X86-LABEL: test_mm512_mask_min_round_ps_current:
8481 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8482 ; X86-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
8483 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8484 ; X86-NEXT: retl ## encoding: [0xc3]
8486 ; X64-LABEL: test_mm512_mask_min_round_ps_current:
8488 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8489 ; X64-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
8490 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8491 ; X64-NEXT: retq ## encoding: [0xc3]
8492 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8493 ret <16 x float> %res
8496 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8497 ; CHECK-LABEL: test_mm512_min_round_ps_sae:
8499 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5d,0xc1]
8500 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8501 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8502 ret <16 x float> %res
8505 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8506 ; CHECK-LABEL: test_mm512_min_round_ps_current:
8508 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5d,0xc1]
8509 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8510 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8511 ret <16 x float> %res
8513 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
8515 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8516 ; X86-LABEL: test_mm512_maskz_max_round_ps_sae:
8518 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8519 ; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
8520 ; X86-NEXT: retl ## encoding: [0xc3]
8522 ; X64-LABEL: test_mm512_maskz_max_round_ps_sae:
8524 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8525 ; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
8526 ; X64-NEXT: retq ## encoding: [0xc3]
8527 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
8528 ret <16 x float> %res
8531 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8532 ; X86-LABEL: test_mm512_maskz_max_round_ps_current:
8534 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8535 ; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
8536 ; X86-NEXT: retl ## encoding: [0xc3]
8538 ; X64-LABEL: test_mm512_maskz_max_round_ps_current:
8540 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8541 ; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
8542 ; X64-NEXT: retq ## encoding: [0xc3]
8543 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
8544 ret <16 x float> %res
8547 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8548 ; X86-LABEL: test_mm512_mask_max_round_ps_sae:
8550 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8551 ; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
8552 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8553 ; X86-NEXT: retl ## encoding: [0xc3]
8555 ; X64-LABEL: test_mm512_mask_max_round_ps_sae:
8557 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8558 ; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
8559 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8560 ; X64-NEXT: retq ## encoding: [0xc3]
8561 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8562 ret <16 x float> %res
8565 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8566 ; X86-LABEL: test_mm512_mask_max_round_ps_current:
8568 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8569 ; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
8570 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8571 ; X86-NEXT: retl ## encoding: [0xc3]
8573 ; X64-LABEL: test_mm512_mask_max_round_ps_current:
8575 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8576 ; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
8577 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8578 ; X64-NEXT: retq ## encoding: [0xc3]
8579 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8580 ret <16 x float> %res
8583 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8584 ; CHECK-LABEL: test_mm512_max_round_ps_sae:
8586 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5f,0xc1]
8587 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8588 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8589 ret <16 x float> %res
8592 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8593 ; CHECK-LABEL: test_mm512_max_round_ps_current:
8595 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5f,0xc1]
8596 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8597 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8598 ret <16 x float> %res
8600 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
8602 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
8603 ; CHECK-LABEL: test_sqrt_pd_512:
8605 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x51,0xc0]
8606 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8607 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 4)
8608 ret <8 x double> %res
8610 define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
8611 ; X86-LABEL: test_mask_sqrt_pd_512:
8613 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8614 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8615 ; X86-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
8616 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
8617 ; X86-NEXT: retl ## encoding: [0xc3]
8619 ; X64-LABEL: test_mask_sqrt_pd_512:
8621 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8622 ; X64-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
8623 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
8624 ; X64-NEXT: retq ## encoding: [0xc3]
8625 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 4)
8626 ret <8 x double> %res
8628 define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) {
8629 ; X86-LABEL: test_maskz_sqrt_pd_512:
8631 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8632 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8633 ; X86-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
8634 ; X86-NEXT: retl ## encoding: [0xc3]
8636 ; X64-LABEL: test_maskz_sqrt_pd_512:
8638 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8639 ; X64-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
8640 ; X64-NEXT: retq ## encoding: [0xc3]
8641 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 4)
8642 ret <8 x double> %res
8644 define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) {
8645 ; CHECK-LABEL: test_sqrt_round_pd_512:
8647 ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x78,0x51,0xc0]
8648 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8649 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 11)
8650 ret <8 x double> %res
8652 define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
8653 ; X86-LABEL: test_mask_sqrt_round_pd_512:
8655 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8656 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8657 ; X86-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
8658 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
8659 ; X86-NEXT: retl ## encoding: [0xc3]
8661 ; X64-LABEL: test_mask_sqrt_round_pd_512:
8663 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8664 ; X64-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
8665 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
8666 ; X64-NEXT: retq ## encoding: [0xc3]
8667 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 11)
8668 ret <8 x double> %res
8670 define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) {
8671 ; X86-LABEL: test_maskz_sqrt_round_pd_512:
8673 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8674 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8675 ; X86-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
8676 ; X86-NEXT: retl ## encoding: [0xc3]
8678 ; X64-LABEL: test_maskz_sqrt_round_pd_512:
8680 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8681 ; X64-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
8682 ; X64-NEXT: retq ## encoding: [0xc3]
8683 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 11)
8684 ret <8 x double> %res
8686 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
8688 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
8689 ; CHECK-LABEL: test_sqrt_ps_512:
8691 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x51,0xc0]
8692 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8693 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> undef, i16 -1, i32 4)
8694 ret <16 x float> %res
8696 define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
8697 ; X86-LABEL: test_mask_sqrt_ps_512:
8699 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8700 ; X86-NEXT: vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
8701 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
8702 ; X86-NEXT: retl ## encoding: [0xc3]
8704 ; X64-LABEL: test_mask_sqrt_ps_512:
8706 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8707 ; X64-NEXT: vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
8708 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
8709 ; X64-NEXT: retq ## encoding: [0xc3]
8710 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 4)
8711 ret <16 x float> %res
8713 define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) {
8714 ; X86-LABEL: test_maskz_sqrt_ps_512:
8716 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8717 ; X86-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
8718 ; X86-NEXT: retl ## encoding: [0xc3]
8720 ; X64-LABEL: test_maskz_sqrt_ps_512:
8722 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8723 ; X64-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
8724 ; X64-NEXT: retq ## encoding: [0xc3]
8725 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
8726 ret <16 x float> %res
8728 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
8729 ; CHECK-LABEL: test_sqrt_round_ps_512:
8731 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x51,0xc0]
8732 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8733 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 11)
8734 ret <16 x float> %res
8736 define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
8737 ; X86-LABEL: test_mask_sqrt_round_ps_512:
8739 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8740 ; X86-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
8741 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
8742 ; X86-NEXT: retl ## encoding: [0xc3]
8744 ; X64-LABEL: test_mask_sqrt_round_ps_512:
8746 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8747 ; X64-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
8748 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
8749 ; X64-NEXT: retq ## encoding: [0xc3]
8750 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 11)
8751 ret <16 x float> %res
8753 define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) {
8754 ; X86-LABEL: test_maskz_sqrt_round_ps_512:
8756 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8757 ; X86-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
8758 ; X86-NEXT: retl ## encoding: [0xc3]
8760 ; X64-LABEL: test_maskz_sqrt_round_ps_512:
8762 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8763 ; X64-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
8764 ; X64-NEXT: retq ## encoding: [0xc3]
8765 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 11)
8766 ret <16 x float> %res
8768 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
8770 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
8772 define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
8773 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_512:
8775 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9]
8776 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8777 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
8778 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
8779 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
8780 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
8781 ; X86-NEXT: retl ## encoding: [0xc3]
8783 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_512:
8785 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9]
8786 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8787 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
8788 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
8789 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
8790 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
8791 ; X64-NEXT: retq ## encoding: [0xc3]
8792 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
8793 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
8794 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
8795 %res3 = add <16 x i32> %res, %res1
8796 %res4 = add <16 x i32> %res3, %res2
8797 ret <16 x i32> %res4
8800 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8802 define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
8803 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_512:
8805 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9]
8806 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8807 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8808 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
8809 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
8810 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
8811 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
8812 ; X86-NEXT: retl ## encoding: [0xc3]
8814 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_512:
8816 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9]
8817 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8818 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
8819 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
8820 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
8821 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
8822 ; X64-NEXT: retq ## encoding: [0xc3]
8823 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
8824 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
8825 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
8826 %res3 = add <8 x i64> %res, %res1
8827 %res4 = add <8 x i64> %res3, %res2
8831 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
8833 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
8834 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_512:
8836 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9]
8837 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8838 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
8839 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
8840 ; X86-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
8841 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
8842 ; X86-NEXT: retl ## encoding: [0xc3]
8844 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_512:
8846 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9]
8847 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8848 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
8849 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
8850 ; X64-NEXT: vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
8851 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
8852 ; X64-NEXT: retq ## encoding: [0xc3]
8853 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
8854 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
8855 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
8856 %res3 = add <16 x i32> %res, %res1
8857 %res4 = add <16 x i32> %res3, %res2
8858 ret <16 x i32> %res4
8861 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8863 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
8864 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_512:
8866 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9]
8867 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8868 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8869 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
8870 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
8871 ; X86-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
8872 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
8873 ; X86-NEXT: retl ## encoding: [0xc3]
8875 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_512:
8877 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9]
8878 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8879 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
8880 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
8881 ; X64-NEXT: vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
8882 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
8883 ; X64-NEXT: retq ## encoding: [0xc3]
8884 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
8885 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
8886 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
8887 %res3 = add <8 x i64> %res, %res1
8888 %res4 = add <8 x i64> %res3, %res2
8892 declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16)
8894 define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
8895 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_512:
8897 ; X86-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03]
8898 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8899 ; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
8900 ; X86-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03]
8901 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
8902 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
8903 ; X86-NEXT: retl ## encoding: [0xc3]
8905 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_512:
8907 ; X64-NEXT: vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03]
8908 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8909 ; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
8910 ; X64-NEXT: vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03]
8911 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
8912 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
8913 ; X64-NEXT: retq ## encoding: [0xc3]
8914 %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
8915 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
8916 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
8917 %res3 = add <16 x i32> %res, %res1
8918 %res4 = add <16 x i32> %res3, %res2
8919 ret <16 x i32> %res4
8922 declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8)
8924 define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
8925 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_512:
8927 ; X86-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03]
8928 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
8929 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8930 ; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
8931 ; X86-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03]
8932 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
8933 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
8934 ; X86-NEXT: retl ## encoding: [0xc3]
8936 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_512:
8938 ; X64-NEXT: vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03]
8939 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8940 ; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
8941 ; X64-NEXT: vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03]
8942 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
8943 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
8944 ; X64-NEXT: retq ## encoding: [0xc3]
8945 %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
8946 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
8947 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
8948 %res3 = add <8 x i64> %res, %res1
8949 %res4 = add <8 x i64> %res3, %res2
8953 declare <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32>, i32, <16 x i32>, i16)
8955 define <16 x i32>@test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
8956 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_512:
8958 ; X86-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03]
8959 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8960 ; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
8961 ; X86-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03]
8962 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
8963 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
8964 ; X86-NEXT: retl ## encoding: [0xc3]
8966 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_512:
8968 ; X64-NEXT: vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03]
8969 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8970 ; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
8971 ; X64-NEXT: vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03]
8972 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
8973 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
8974 ; X64-NEXT: retq ## encoding: [0xc3]
8975 %res = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
8976 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
8977 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
8978 %res3 = add <16 x i32> %res, %res1
8979 %res4 = add <16 x i32> %res3, %res2
8980 ret <16 x i32> %res4
8983 declare <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64>, i32, <8 x i64>, i8)
8985 define <8 x i64>@test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
8986 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_512:
8988 ; X86-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03]
8989 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
8990 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8991 ; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
8992 ; X86-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03]
8993 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
8994 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
8995 ; X86-NEXT: retl ## encoding: [0xc3]
8997 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_512:
8999 ; X64-NEXT: vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03]
9000 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9001 ; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
9002 ; X64-NEXT: vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03]
9003 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
9004 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9005 ; X64-NEXT: retq ## encoding: [0xc3]
9006 %res = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
9007 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
9008 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
9009 %res3 = add <8 x i64> %res, %res1
9010 %res4 = add <8 x i64> %res3, %res2
9014 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9016 define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9017 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
9019 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9020 ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9021 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xda]
9022 ; X86-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2
9023 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9024 ; X86-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9025 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xe2]
9026 ; X86-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2
9027 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9028 ; X86-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9029 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
9030 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa9,0xc2]
9031 ; X86-NEXT: vaddpd %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc0]
9032 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9033 ; X86-NEXT: retl ## encoding: [0xc3]
9035 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
9037 ; X64-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9038 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xda]
9039 ; X64-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2
9040 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9041 ; X64-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9042 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xe2]
9043 ; X64-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2
9044 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9045 ; X64-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9046 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
9047 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa9,0xc2]
9048 ; X64-NEXT: vaddpd %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc0]
9049 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9050 ; X64-NEXT: retq ## encoding: [0xc3]
9051 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
9052 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9053 %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9054 %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
9055 %res4 = fadd <2 x double> %res, %res1
9056 %res5 = fadd <2 x double> %res2, %res3
9057 %res6 = fadd <2 x double> %res4, %res5
9058 ret <2 x double> %res6
9061 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9063 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9064 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
9066 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9067 ; X86-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
9068 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xda]
9069 ; X86-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2
9070 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9071 ; X86-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9072 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xe2]
9073 ; X86-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2
9074 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9075 ; X86-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9076 ; X86-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
9077 ; X86-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa9,0xc2]
9078 ; X86-NEXT: vaddps %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc0]
9079 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9080 ; X86-NEXT: retl ## encoding: [0xc3]
9082 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
9084 ; X64-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
9085 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xda]
9086 ; X64-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2
9087 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9088 ; X64-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9089 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xe2]
9090 ; X64-NEXT: ## xmm4 = (xmm1 * xmm4) + xmm2
9091 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9092 ; X64-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9093 ; X64-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
9094 ; X64-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa9,0xc2]
9095 ; X64-NEXT: vaddps %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc0]
9096 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9097 ; X64-NEXT: retq ## encoding: [0xc3]
9098 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
9099 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9100 %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
9101 %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
9102 %res4 = fadd <4 x float> %res, %res1
9103 %res5 = fadd <4 x float> %res2, %res3
9104 %res6 = fadd <4 x float> %res4, %res5
9105 ret <4 x float> %res6
9108 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9110 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9111 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
9113 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9114 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9115 ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9116 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
9117 ; X86-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2
9118 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
9119 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9120 ; X86-NEXT: retl ## encoding: [0xc3]
9122 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
9124 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9125 ; X64-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9126 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
9127 ; X64-NEXT: ## xmm3 = (xmm1 * xmm3) + xmm2
9128 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
9129 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9130 ; X64-NEXT: retq ## encoding: [0xc3]
9131 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9132 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
9133 %res2 = fadd <2 x double> %res, %res1
9134 ret <2 x double> %res2
9137 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9139 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9140 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
9142 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9143 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9144 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
9145 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
9146 ; X86-NEXT: retl ## encoding: [0xc3]
9148 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
9150 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9151 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
9152 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
9153 ; X64-NEXT: retq ## encoding: [0xc3]
9154 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9155 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
9156 %res2 = fadd <4 x float> %res, %res1
9157 ret <4 x float> %res
9159 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9161 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9162 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
9164 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9165 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9166 ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0xd9]
9167 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3
9168 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9169 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9170 ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xe1]
9171 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4
9172 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9173 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9174 ; X86-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
9175 ; X86-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xb9,0xd1]
9176 ; X86-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
9177 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9178 ; X86-NEXT: retl ## encoding: [0xc3]
9180 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
9182 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9183 ; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0xd9]
9184 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3
9185 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9186 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9187 ; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xe1]
9188 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4
9189 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9190 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9191 ; X64-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
9192 ; X64-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xb9,0xd1]
9193 ; X64-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
9194 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9195 ; X64-NEXT: retq ## encoding: [0xc3]
9196 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
9197 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9198 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9199 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
9200 %res4 = fadd <2 x double> %res, %res1
9201 %res5 = fadd <2 x double> %res2, %res3
9202 %res6 = fadd <2 x double> %res4, %res5
9203 ret <2 x double> %res6
9206 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9208 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9209 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
9211 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9212 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
9213 ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd9]
9214 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3
9215 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9216 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9217 ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xe1]
9218 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4
9219 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9220 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9221 ; X86-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
9222 ; X86-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xb9,0xd1]
9223 ; X86-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
9224 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9225 ; X86-NEXT: retl ## encoding: [0xc3]
9227 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
9229 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
9230 ; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd9]
9231 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) + xmm3
9232 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9233 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9234 ; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xe1]
9235 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) + xmm4
9236 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9237 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9238 ; X64-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
9239 ; X64-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xb9,0xd1]
9240 ; X64-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
9241 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9242 ; X64-NEXT: retq ## encoding: [0xc3]
9243 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
9244 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9245 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
9246 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
9247 %res4 = fadd <4 x float> %res, %res1
9248 %res5 = fadd <4 x float> %res2, %res3
9249 %res6 = fadd <4 x float> %res4, %res5
9250 ret <4 x float> %res6
9253 define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) {
9254 ; X86-LABEL: fmadd_ss_mask_memfold:
9256 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
9257 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
9258 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
9259 ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
9260 ; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero
9261 ; X86-NEXT: vmovss (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
9262 ; X86-NEXT: ## xmm1 = mem[0],zero,zero,zero
9263 ; X86-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
9264 ; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
9265 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9266 ; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
9267 ; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
9268 ; X86-NEXT: retl ## encoding: [0xc3]
9270 ; X64-LABEL: fmadd_ss_mask_memfold:
9272 ; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
9273 ; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero
9274 ; X64-NEXT: vmovss (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
9275 ; X64-NEXT: ## xmm1 = mem[0],zero,zero,zero
9276 ; X64-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
9277 ; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
9278 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
9279 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
9280 ; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
9281 ; X64-NEXT: retq ## encoding: [0xc3]
9282 %a.val = load float, float* %a
9283 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
9284 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
9285 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
9286 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
9288 %b.val = load float, float* %b
9289 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
9290 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
9291 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
9292 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
9294 %vr = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
9296 %sr = extractelement <4 x float> %vr, i32 0
9297 store float %sr, float* %a
9301 define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
9302 ; X86-LABEL: fmadd_ss_maskz_memfold:
9304 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
9305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
9306 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
9307 ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
9308 ; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero
9309 ; X86-NEXT: vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01]
9310 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
9311 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9312 ; X86-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
9313 ; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
9314 ; X86-NEXT: retl ## encoding: [0xc3]
9316 ; X64-LABEL: fmadd_ss_maskz_memfold:
9318 ; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
9319 ; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero
9320 ; X64-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
9321 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
9322 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
9323 ; X64-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
9324 ; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
9325 ; X64-NEXT: retq ## encoding: [0xc3]
9326 %a.val = load float, float* %a
9327 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
9328 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
9329 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
9330 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
9332 %b.val = load float, float* %b
9333 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
9334 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
9335 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
9336 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
9338 %vr = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
9340 %sr = extractelement <4 x float> %vr, i32 0
9341 store float %sr, float* %a
9345 define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) {
9346 ; X86-LABEL: fmadd_sd_mask_memfold:
9348 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
9349 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
9350 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
9351 ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
9352 ; X86-NEXT: ## xmm0 = mem[0],zero
9353 ; X86-NEXT: vmovsd (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x09]
9354 ; X86-NEXT: ## xmm1 = mem[0],zero
9355 ; X86-NEXT: vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
9356 ; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
9357 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9358 ; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
9359 ; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
9360 ; X86-NEXT: retl ## encoding: [0xc3]
9362 ; X64-LABEL: fmadd_sd_mask_memfold:
9364 ; X64-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
9365 ; X64-NEXT: ## xmm0 = mem[0],zero
9366 ; X64-NEXT: vmovsd (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0e]
9367 ; X64-NEXT: ## xmm1 = mem[0],zero
9368 ; X64-NEXT: vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
9369 ; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
9370 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
9371 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
9372 ; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
9373 ; X64-NEXT: retq ## encoding: [0xc3]
9374 %a.val = load double, double* %a
9375 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
9376 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
9378 %b.val = load double, double* %b
9379 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
9380 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
9382 %vr = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
9384 %sr = extractelement <2 x double> %vr, i32 0
9385 store double %sr, double* %a
9389 define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
9390 ; X86-LABEL: fmadd_sd_maskz_memfold:
9392 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
9393 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
9394 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
9395 ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
9396 ; X86-NEXT: ## xmm0 = mem[0],zero
9397 ; X86-NEXT: vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01]
9398 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
9399 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9400 ; X86-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
9401 ; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
9402 ; X86-NEXT: retl ## encoding: [0xc3]
9404 ; X64-LABEL: fmadd_sd_maskz_memfold:
9406 ; X64-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
9407 ; X64-NEXT: ## xmm0 = mem[0],zero
9408 ; X64-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
9409 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
9410 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
9411 ; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
9412 ; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
9413 ; X64-NEXT: retq ## encoding: [0xc3]
9414 %a.val = load double, double* %a
9415 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
9416 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
9418 %b.val = load double, double* %b
9419 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
9420 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
9422 %vr = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
9424 %sr = extractelement <2 x double> %vr, i32 0
9425 store double %sr, double* %a
9429 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9431 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9432 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
9434 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9435 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9436 ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0xd9]
9437 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3
9438 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9439 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9440 ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xe1]
9441 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4
9442 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9443 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9444 ; X86-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
9445 ; X86-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbb,0xd1]
9446 ; X86-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
9447 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9448 ; X86-NEXT: retl ## encoding: [0xc3]
9450 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
9452 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9453 ; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0xd9]
9454 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3
9455 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9456 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9457 ; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xe1]
9458 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4
9459 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9460 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9461 ; X64-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
9462 ; X64-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbb,0xd1]
9463 ; X64-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
9464 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9465 ; X64-NEXT: retq ## encoding: [0xc3]
9466 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
9467 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9468 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9469 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
9470 %res4 = fadd <2 x double> %res, %res1
9471 %res5 = fadd <2 x double> %res2, %res3
9472 %res6 = fadd <2 x double> %res4, %res5
9473 ret <2 x double> %res6
9476 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9478 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9479 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
9481 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9482 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
9483 ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0xd9]
9484 ; X86-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3
9485 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9486 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9487 ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xe1]
9488 ; X86-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4
9489 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9490 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9491 ; X86-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
9492 ; X86-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbb,0xd1]
9493 ; X86-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
9494 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9495 ; X86-NEXT: retl ## encoding: [0xc3]
9497 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
9499 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
9500 ; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0xd9]
9501 ; X64-NEXT: ## xmm3 = (xmm0 * xmm1) - xmm3
9502 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9503 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9504 ; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xe1]
9505 ; X64-NEXT: ## xmm4 = (xmm0 * xmm1) - xmm4
9506 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9507 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9508 ; X64-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
9509 ; X64-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbb,0xd1]
9510 ; X64-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
9511 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9512 ; X64-NEXT: retq ## encoding: [0xc3]
9513 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
9514 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9515 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
9516 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
9517 %res4 = fadd <4 x float> %res, %res1
9518 %res5 = fadd <4 x float> %res2, %res3
9519 %res6 = fadd <4 x float> %res4, %res5
9520 ret <4 x float> %res6
9523 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9525 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9526 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
9528 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9529 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9530 ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0xd9]
9531 ; X86-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3
9532 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9533 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9534 ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xe1]
9535 ; X86-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4
9536 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9537 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9538 ; X86-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
9539 ; X86-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbf,0xd1]
9540 ; X86-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
9541 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9542 ; X86-NEXT: retl ## encoding: [0xc3]
9544 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
9546 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9547 ; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0xd9]
9548 ; X64-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3
9549 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9550 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9551 ; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xe1]
9552 ; X64-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4
9553 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9554 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9555 ; X64-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
9556 ; X64-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbf,0xd1]
9557 ; X64-NEXT: vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
9558 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9559 ; X64-NEXT: retq ## encoding: [0xc3]
9560 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
9561 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9562 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9563 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
9564 %res4 = fadd <2 x double> %res, %res1
9565 %res5 = fadd <2 x double> %res2, %res3
9566 %res6 = fadd <2 x double> %res4, %res5
9567 ret <2 x double> %res6
9570 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9572 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9573 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
9575 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9576 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
9577 ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0xd9]
9578 ; X86-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3
9579 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9580 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9581 ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xe1]
9582 ; X86-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4
9583 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9584 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9585 ; X86-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
9586 ; X86-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbf,0xd1]
9587 ; X86-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
9588 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9589 ; X86-NEXT: retl ## encoding: [0xc3]
9591 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
9593 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
9594 ; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0xd9]
9595 ; X64-NEXT: ## xmm3 = -(xmm0 * xmm1) - xmm3
9596 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9597 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9598 ; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xe1]
9599 ; X64-NEXT: ## xmm4 = -(xmm0 * xmm1) - xmm4
9600 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9601 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
9602 ; X64-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
9603 ; X64-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbf,0xd1]
9604 ; X64-NEXT: vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
9605 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
9606 ; X64-NEXT: retq ## encoding: [0xc3]
9607 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
9608 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9609 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
9610 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
9611 %res4 = fadd <4 x float> %res, %res1
9612 %res5 = fadd <4 x float> %res2, %res3
9613 %res6 = fadd <4 x float> %res4, %res5
9614 ret <4 x float> %res6
9617 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) {
9618 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
9620 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9621 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
9622 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
9623 ; X86-NEXT: vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08]
9624 ; X86-NEXT: ## xmm1 = (xmm0 * mem) + xmm1
9625 ; X86-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
9626 ; X86-NEXT: retl ## encoding: [0xc3]
9628 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
9630 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9631 ; X64-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x0f]
9632 ; X64-NEXT: ## xmm1 = (xmm0 * mem) + xmm1
9633 ; X64-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
9634 ; X64-NEXT: retq ## encoding: [0xc3]
9635 %q = load float, float* %ptr_b
9636 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
9637 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
9638 ret < 4 x float> %res
9641 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
9642 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
9644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9645 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
9646 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
9647 ; X86-NEXT: vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00]
9648 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm1
9649 ; X86-NEXT: retl ## encoding: [0xc3]
9651 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
9653 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9654 ; X64-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x07]
9655 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm1
9656 ; X64-NEXT: retq ## encoding: [0xc3]
9657 %q = load float, float* %ptr_b
9658 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
9659 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
9660 ret < 4 x float> %res
9664 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
9665 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
9667 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
9668 ; CHECK-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
9669 ; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
9670 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9671 %q = load float, float* %ptr_b
9672 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
9673 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4)
9674 ret < 4 x float> %res
9677 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
9679 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
9680 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
9682 ; X86-NEXT: vpmovqd %zmm0, %ymm2 ## encoding: [0x62,0xf2,0x7e,0x48,0x35,0xc2]
9683 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9684 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9685 ; X86-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
9686 ; X86-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
9687 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0xfe,0xc0]
9688 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0xc5,0xed,0xfe,0xc0]
9689 ; X86-NEXT: retl ## encoding: [0xc3]
9691 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
9693 ; X64-NEXT: vpmovqd %zmm0, %ymm2 ## encoding: [0x62,0xf2,0x7e,0x48,0x35,0xc2]
9694 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9695 ; X64-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
9696 ; X64-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
9697 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0xfe,0xc0]
9698 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0xc5,0xed,0xfe,0xc0]
9699 ; X64-NEXT: retq ## encoding: [0xc3]
9700 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
9701 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
9702 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
9703 %res3 = add <8 x i32> %res0, %res1
9704 %res4 = add <8 x i32> %res3, %res2
9708 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
9710 define <16 x float> @test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
9711 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
9713 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9714 ; X86-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
9715 ; X86-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
9716 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9717 ; X86-NEXT: retl ## encoding: [0xc3]
9719 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
9721 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9722 ; X64-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
9723 ; X64-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
9724 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9725 ; X64-NEXT: retq ## encoding: [0xc3]
9726 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
9727 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
9728 %res2 = fadd <16 x float> %res, %res1
9729 ret <16 x float> %res2
9732 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
9734 define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
9735 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
9737 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9738 ; X86-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
9739 ; X86-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
9740 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9741 ; X86-NEXT: retl ## encoding: [0xc3]
9743 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
9745 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9746 ; X64-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
9747 ; X64-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
9748 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
9749 ; X64-NEXT: retq ## encoding: [0xc3]
9750 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
9751 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
9752 %res2 = fadd <16 x float> %res, %res1
9753 ret <16 x float> %res2
9756 define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
9757 ; X86-LABEL: test_mask_compress_pd_512:
9759 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9760 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9761 ; X86-NEXT: vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
9762 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9763 ; X86-NEXT: retl ## encoding: [0xc3]
9765 ; X64-LABEL: test_mask_compress_pd_512:
9767 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9768 ; X64-NEXT: vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
9769 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9770 ; X64-NEXT: retq ## encoding: [0xc3]
9771 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
9772 ret <8 x double> %res
9775 define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
9776 ; X86-LABEL: test_maskz_compress_pd_512:
9778 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9779 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9780 ; X86-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
9781 ; X86-NEXT: retl ## encoding: [0xc3]
9783 ; X64-LABEL: test_maskz_compress_pd_512:
9785 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9786 ; X64-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
9787 ; X64-NEXT: retq ## encoding: [0xc3]
9788 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
9789 ret <8 x double> %res
9792 define <8 x double> @test_compress_pd_512(<8 x double> %data) {
9793 ; CHECK-LABEL: test_compress_pd_512:
9795 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9796 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
9797 ret <8 x double> %res
9800 declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
9802 define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
9803 ; X86-LABEL: test_mask_compress_ps_512:
9805 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9806 ; X86-NEXT: vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
9807 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9808 ; X86-NEXT: retl ## encoding: [0xc3]
9810 ; X64-LABEL: test_mask_compress_ps_512:
9812 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9813 ; X64-NEXT: vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
9814 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9815 ; X64-NEXT: retq ## encoding: [0xc3]
9816 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
9817 ret <16 x float> %res
9820 define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
9821 ; X86-LABEL: test_maskz_compress_ps_512:
9823 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9824 ; X86-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
9825 ; X86-NEXT: retl ## encoding: [0xc3]
9827 ; X64-LABEL: test_maskz_compress_ps_512:
9829 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9830 ; X64-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
9831 ; X64-NEXT: retq ## encoding: [0xc3]
9832 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
9833 ret <16 x float> %res
9836 define <16 x float> @test_compress_ps_512(<16 x float> %data) {
9837 ; CHECK-LABEL: test_compress_ps_512:
9839 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9840 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
9841 ret <16 x float> %res
9844 declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
9846 define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
9847 ; X86-LABEL: test_mask_compress_q_512:
9849 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9850 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9851 ; X86-NEXT: vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
9852 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9853 ; X86-NEXT: retl ## encoding: [0xc3]
9855 ; X64-LABEL: test_mask_compress_q_512:
9857 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9858 ; X64-NEXT: vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
9859 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9860 ; X64-NEXT: retq ## encoding: [0xc3]
9861 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
9865 define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
9866 ; X86-LABEL: test_maskz_compress_q_512:
9868 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9869 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9870 ; X86-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
9871 ; X86-NEXT: retl ## encoding: [0xc3]
9873 ; X64-LABEL: test_maskz_compress_q_512:
9875 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9876 ; X64-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
9877 ; X64-NEXT: retq ## encoding: [0xc3]
9878 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
9882 define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
9883 ; CHECK-LABEL: test_compress_q_512:
9885 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9886 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
9890 declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
9892 define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
9893 ; X86-LABEL: test_mask_compress_d_512:
9895 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9896 ; X86-NEXT: vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
9897 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9898 ; X86-NEXT: retl ## encoding: [0xc3]
9900 ; X64-LABEL: test_mask_compress_d_512:
9902 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9903 ; X64-NEXT: vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
9904 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9905 ; X64-NEXT: retq ## encoding: [0xc3]
9906 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
9910 define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
9911 ; X86-LABEL: test_maskz_compress_d_512:
9913 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9914 ; X86-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
9915 ; X86-NEXT: retl ## encoding: [0xc3]
9917 ; X64-LABEL: test_maskz_compress_d_512:
9919 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9920 ; X64-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
9921 ; X64-NEXT: retq ## encoding: [0xc3]
9922 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
9926 define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
9927 ; CHECK-LABEL: test_compress_d_512:
9929 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9930 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
9934 declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
9936 define <8 x double> @test_expand_pd_512(<8 x double> %data) {
9937 ; CHECK-LABEL: test_expand_pd_512:
9939 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9940 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
9941 ret <8 x double> %res
9944 define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
9945 ; X86-LABEL: test_mask_expand_pd_512:
9947 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9948 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9949 ; X86-NEXT: vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
9950 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9951 ; X86-NEXT: retl ## encoding: [0xc3]
9953 ; X64-LABEL: test_mask_expand_pd_512:
9955 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9956 ; X64-NEXT: vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
9957 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9958 ; X64-NEXT: retq ## encoding: [0xc3]
9959 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
9960 ret <8 x double> %res
9963 define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
9964 ; X86-LABEL: test_maskz_expand_pd_512:
9966 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9967 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9968 ; X86-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
9969 ; X86-NEXT: retl ## encoding: [0xc3]
9971 ; X64-LABEL: test_maskz_expand_pd_512:
9973 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9974 ; X64-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
9975 ; X64-NEXT: retq ## encoding: [0xc3]
9976 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
9977 ret <8 x double> %res
9980 declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
9982 define <16 x float> @test_expand_ps_512(<16 x float> %data) {
9983 ; CHECK-LABEL: test_expand_ps_512:
9985 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9986 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
9987 ret <16 x float> %res
9990 define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
9991 ; X86-LABEL: test_mask_expand_ps_512:
9993 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9994 ; X86-NEXT: vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
9995 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
9996 ; X86-NEXT: retl ## encoding: [0xc3]
9998 ; X64-LABEL: test_mask_expand_ps_512:
10000 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10001 ; X64-NEXT: vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
10002 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10003 ; X64-NEXT: retq ## encoding: [0xc3]
10004 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
10005 ret <16 x float> %res
10008 define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
10009 ; X86-LABEL: test_maskz_expand_ps_512:
10011 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10012 ; X86-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
10013 ; X86-NEXT: retl ## encoding: [0xc3]
10015 ; X64-LABEL: test_maskz_expand_ps_512:
10017 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10018 ; X64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
10019 ; X64-NEXT: retq ## encoding: [0xc3]
10020 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
10021 ret <16 x float> %res
10024 declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
10026 define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
10027 ; CHECK-LABEL: test_expand_q_512:
10029 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10030 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
10034 define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
10035 ; X86-LABEL: test_mask_expand_q_512:
10037 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10038 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10039 ; X86-NEXT: vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
10040 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10041 ; X86-NEXT: retl ## encoding: [0xc3]
10043 ; X64-LABEL: test_mask_expand_q_512:
10045 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10046 ; X64-NEXT: vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
10047 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10048 ; X64-NEXT: retq ## encoding: [0xc3]
10049 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
10053 define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
10054 ; X86-LABEL: test_maskz_expand_q_512:
10056 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10057 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10058 ; X86-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
10059 ; X86-NEXT: retl ## encoding: [0xc3]
10061 ; X64-LABEL: test_maskz_expand_q_512:
10063 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10064 ; X64-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
10065 ; X64-NEXT: retq ## encoding: [0xc3]
10066 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
10070 declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
10072 define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
10073 ; CHECK-LABEL: test_expand_d_512:
10075 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10076 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
10077 ret <16 x i32> %res
10080 define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
10081 ; X86-LABEL: test_mask_expand_d_512:
10083 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10084 ; X86-NEXT: vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
10085 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10086 ; X86-NEXT: retl ## encoding: [0xc3]
10088 ; X64-LABEL: test_mask_expand_d_512:
10090 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10091 ; X64-NEXT: vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
10092 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10093 ; X64-NEXT: retq ## encoding: [0xc3]
10094 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
10095 ret <16 x i32> %res
10098 define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
10099 ; X86-LABEL: test_maskz_expand_d_512:
10101 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10102 ; X86-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
10103 ; X86-NEXT: retl ## encoding: [0xc3]
10105 ; X64-LABEL: test_maskz_expand_d_512:
10107 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10108 ; X64-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
10109 ; X64-NEXT: retq ## encoding: [0xc3]
10110 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
10111 ret <16 x i32> %res
10114 declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)