1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
6 ; X86-LABEL: test_mask_compress_pd_128:
8 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10 ; X86-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
11 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12 ; X86-NEXT: retl # encoding: [0xc3]
14 ; X64-LABEL: test_mask_compress_pd_128:
16 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17 ; X64-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
18 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
19 ; X64-NEXT: retq # encoding: [0xc3]
20 %1 = bitcast i8 %mask to <8 x i1>
21 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
22 %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
26 define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
27 ; X86-LABEL: test_maskz_compress_pd_128:
29 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
30 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
31 ; X86-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
32 ; X86-NEXT: retl # encoding: [0xc3]
34 ; X64-LABEL: test_maskz_compress_pd_128:
36 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
37 ; X64-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
38 ; X64-NEXT: retq # encoding: [0xc3]
39 %1 = bitcast i8 %mask to <8 x i1>
40 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
41 %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
45 define <2 x double> @test_compress_pd_128(<2 x double> %data) {
46 ; CHECK-LABEL: test_compress_pd_128:
48 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
49 %1 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
53 define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
54 ; X86-LABEL: test_mask_compress_ps_128:
56 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
57 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
58 ; X86-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
59 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
60 ; X86-NEXT: retl # encoding: [0xc3]
62 ; X64-LABEL: test_mask_compress_ps_128:
64 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
65 ; X64-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
66 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
67 ; X64-NEXT: retq # encoding: [0xc3]
68 %1 = bitcast i8 %mask to <8 x i1>
69 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
70 %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
74 define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
75 ; X86-LABEL: test_maskz_compress_ps_128:
77 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
78 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
79 ; X86-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
80 ; X86-NEXT: retl # encoding: [0xc3]
82 ; X64-LABEL: test_maskz_compress_ps_128:
84 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
85 ; X64-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
86 ; X64-NEXT: retq # encoding: [0xc3]
87 %1 = bitcast i8 %mask to <8 x i1>
88 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
89 %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
93 define <4 x float> @test_compress_ps_128(<4 x float> %data) {
94 ; CHECK-LABEL: test_compress_ps_128:
96 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
97 %1 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
101 define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
102 ; X86-LABEL: test_mask_compress_q_128:
104 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
105 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
106 ; X86-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
107 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
108 ; X86-NEXT: retl # encoding: [0xc3]
110 ; X64-LABEL: test_mask_compress_q_128:
112 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
113 ; X64-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
114 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
115 ; X64-NEXT: retq # encoding: [0xc3]
116 %1 = bitcast i8 %mask to <8 x i1>
117 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
118 %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
122 define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
123 ; X86-LABEL: test_maskz_compress_q_128:
125 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
126 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
127 ; X86-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
128 ; X86-NEXT: retl # encoding: [0xc3]
130 ; X64-LABEL: test_maskz_compress_q_128:
132 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
133 ; X64-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
134 ; X64-NEXT: retq # encoding: [0xc3]
135 %1 = bitcast i8 %mask to <8 x i1>
136 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
137 %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
141 define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
142 ; CHECK-LABEL: test_compress_q_128:
144 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
145 %1 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
149 define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
150 ; X86-LABEL: test_mask_compress_d_128:
152 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
153 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
154 ; X86-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
155 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
156 ; X86-NEXT: retl # encoding: [0xc3]
158 ; X64-LABEL: test_mask_compress_d_128:
160 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
161 ; X64-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
162 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
163 ; X64-NEXT: retq # encoding: [0xc3]
164 %1 = bitcast i8 %mask to <8 x i1>
165 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
166 %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
170 define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
171 ; X86-LABEL: test_maskz_compress_d_128:
173 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
174 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
175 ; X86-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
176 ; X86-NEXT: retl # encoding: [0xc3]
178 ; X64-LABEL: test_maskz_compress_d_128:
180 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
181 ; X64-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
182 ; X64-NEXT: retq # encoding: [0xc3]
183 %1 = bitcast i8 %mask to <8 x i1>
184 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
185 %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
189 define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
190 ; CHECK-LABEL: test_compress_d_128:
192 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
193 %1 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
197 define <2 x double> @test_expand_pd_128(<2 x double> %data) {
198 ; CHECK-LABEL: test_expand_pd_128:
200 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
201 %1 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
205 define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
206 ; X86-LABEL: test_mask_expand_pd_128:
208 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
209 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
210 ; X86-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
211 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
212 ; X86-NEXT: retl # encoding: [0xc3]
214 ; X64-LABEL: test_mask_expand_pd_128:
216 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
217 ; X64-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
218 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
219 ; X64-NEXT: retq # encoding: [0xc3]
220 %1 = bitcast i8 %mask to <8 x i1>
221 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
222 %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
226 define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
227 ; X86-LABEL: test_maskz_expand_pd_128:
229 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
230 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
231 ; X86-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
232 ; X86-NEXT: retl # encoding: [0xc3]
234 ; X64-LABEL: test_maskz_expand_pd_128:
236 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
237 ; X64-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
238 ; X64-NEXT: retq # encoding: [0xc3]
239 %1 = bitcast i8 %mask to <8 x i1>
240 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
241 %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
245 define <4 x float> @test_expand_ps_128(<4 x float> %data) {
246 ; CHECK-LABEL: test_expand_ps_128:
248 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
249 %1 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
253 define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
254 ; X86-LABEL: test_mask_expand_ps_128:
256 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
257 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
258 ; X86-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
259 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
260 ; X86-NEXT: retl # encoding: [0xc3]
262 ; X64-LABEL: test_mask_expand_ps_128:
264 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
265 ; X64-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
266 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
267 ; X64-NEXT: retq # encoding: [0xc3]
268 %1 = bitcast i8 %mask to <8 x i1>
269 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
270 %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
274 define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
275 ; X86-LABEL: test_maskz_expand_ps_128:
277 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
278 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
279 ; X86-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
280 ; X86-NEXT: retl # encoding: [0xc3]
282 ; X64-LABEL: test_maskz_expand_ps_128:
284 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
285 ; X64-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
286 ; X64-NEXT: retq # encoding: [0xc3]
287 %1 = bitcast i8 %mask to <8 x i1>
288 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
289 %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
293 define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
294 ; CHECK-LABEL: test_expand_q_128:
296 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
297 %1 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
301 define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
302 ; X86-LABEL: test_mask_expand_q_128:
304 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
305 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
306 ; X86-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
307 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
308 ; X86-NEXT: retl # encoding: [0xc3]
310 ; X64-LABEL: test_mask_expand_q_128:
312 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
313 ; X64-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
314 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
315 ; X64-NEXT: retq # encoding: [0xc3]
316 %1 = bitcast i8 %mask to <8 x i1>
317 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
318 %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
322 define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
323 ; X86-LABEL: test_maskz_expand_q_128:
325 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
326 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
327 ; X86-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
328 ; X86-NEXT: retl # encoding: [0xc3]
330 ; X64-LABEL: test_maskz_expand_q_128:
332 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
333 ; X64-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
334 ; X64-NEXT: retq # encoding: [0xc3]
335 %1 = bitcast i8 %mask to <8 x i1>
336 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
337 %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
341 define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
342 ; CHECK-LABEL: test_expand_d_128:
344 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
345 %1 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
349 define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
350 ; X86-LABEL: test_mask_expand_d_128:
352 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
353 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
354 ; X86-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
355 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
356 ; X86-NEXT: retl # encoding: [0xc3]
358 ; X64-LABEL: test_mask_expand_d_128:
360 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
361 ; X64-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
362 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
363 ; X64-NEXT: retq # encoding: [0xc3]
364 %1 = bitcast i8 %mask to <8 x i1>
365 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
366 %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
370 define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
371 ; X86-LABEL: test_maskz_expand_d_128:
373 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
374 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
375 ; X86-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
376 ; X86-NEXT: retl # encoding: [0xc3]
378 ; X64-LABEL: test_maskz_expand_d_128:
380 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
381 ; X64-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
382 ; X64-NEXT: retq # encoding: [0xc3]
383 %1 = bitcast i8 %mask to <8 x i1>
384 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
385 %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
389 define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
390 ; X86-LABEL: test_mask_compress_pd_256:
392 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
393 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
394 ; X86-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
395 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
396 ; X86-NEXT: retl # encoding: [0xc3]
398 ; X64-LABEL: test_mask_compress_pd_256:
400 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
401 ; X64-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
402 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
403 ; X64-NEXT: retq # encoding: [0xc3]
404 %1 = bitcast i8 %mask to <8 x i1>
405 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
406 %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
410 define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
411 ; X86-LABEL: test_maskz_compress_pd_256:
413 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
414 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
415 ; X86-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
416 ; X86-NEXT: retl # encoding: [0xc3]
418 ; X64-LABEL: test_maskz_compress_pd_256:
420 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
421 ; X64-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
422 ; X64-NEXT: retq # encoding: [0xc3]
423 %1 = bitcast i8 %mask to <8 x i1>
424 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
425 %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
429 define <4 x double> @test_compress_pd_256(<4 x double> %data) {
430 ; CHECK-LABEL: test_compress_pd_256:
432 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
433 %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
437 define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
438 ; X86-LABEL: test_mask_compress_ps_256:
440 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
441 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
442 ; X86-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
443 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
444 ; X86-NEXT: retl # encoding: [0xc3]
446 ; X64-LABEL: test_mask_compress_ps_256:
448 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
449 ; X64-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
450 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
451 ; X64-NEXT: retq # encoding: [0xc3]
452 %1 = bitcast i8 %mask to <8 x i1>
453 %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
457 define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
458 ; X86-LABEL: test_maskz_compress_ps_256:
460 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
461 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
462 ; X86-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
463 ; X86-NEXT: retl # encoding: [0xc3]
465 ; X64-LABEL: test_maskz_compress_ps_256:
467 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
468 ; X64-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
469 ; X64-NEXT: retq # encoding: [0xc3]
470 %1 = bitcast i8 %mask to <8 x i1>
471 %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
475 define <8 x float> @test_compress_ps_256(<8 x float> %data) {
476 ; CHECK-LABEL: test_compress_ps_256:
478 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
479 %1 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
483 define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
484 ; X86-LABEL: test_mask_compress_q_256:
486 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
487 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
488 ; X86-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
489 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
490 ; X86-NEXT: retl # encoding: [0xc3]
492 ; X64-LABEL: test_mask_compress_q_256:
494 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
495 ; X64-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
496 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
497 ; X64-NEXT: retq # encoding: [0xc3]
498 %1 = bitcast i8 %mask to <8 x i1>
499 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
500 %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
504 define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
505 ; X86-LABEL: test_maskz_compress_q_256:
507 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
508 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
509 ; X86-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
510 ; X86-NEXT: retl # encoding: [0xc3]
512 ; X64-LABEL: test_maskz_compress_q_256:
514 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
515 ; X64-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
516 ; X64-NEXT: retq # encoding: [0xc3]
517 %1 = bitcast i8 %mask to <8 x i1>
518 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
519 %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
523 define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
524 ; CHECK-LABEL: test_compress_q_256:
526 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
527 %1 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
531 define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
532 ; X86-LABEL: test_mask_compress_d_256:
534 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
535 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
536 ; X86-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
537 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
538 ; X86-NEXT: retl # encoding: [0xc3]
540 ; X64-LABEL: test_mask_compress_d_256:
542 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
543 ; X64-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
544 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
545 ; X64-NEXT: retq # encoding: [0xc3]
546 %1 = bitcast i8 %mask to <8 x i1>
547 %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
551 define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
552 ; X86-LABEL: test_maskz_compress_d_256:
554 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
555 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
556 ; X86-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
557 ; X86-NEXT: retl # encoding: [0xc3]
559 ; X64-LABEL: test_maskz_compress_d_256:
561 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
562 ; X64-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
563 ; X64-NEXT: retq # encoding: [0xc3]
564 %1 = bitcast i8 %mask to <8 x i1>
565 %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
569 define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
570 ; CHECK-LABEL: test_compress_d_256:
572 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
573 %1 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
577 define <4 x double> @test_expand_pd_256(<4 x double> %data) {
578 ; CHECK-LABEL: test_expand_pd_256:
580 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
581 %1 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
585 define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
586 ; X86-LABEL: test_mask_expand_pd_256:
588 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
589 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
590 ; X86-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
591 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
592 ; X86-NEXT: retl # encoding: [0xc3]
594 ; X64-LABEL: test_mask_expand_pd_256:
596 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
597 ; X64-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
598 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
599 ; X64-NEXT: retq # encoding: [0xc3]
600 %1 = bitcast i8 %mask to <8 x i1>
601 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
602 %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
606 define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
607 ; X86-LABEL: test_maskz_expand_pd_256:
609 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
610 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
611 ; X86-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
612 ; X86-NEXT: retl # encoding: [0xc3]
614 ; X64-LABEL: test_maskz_expand_pd_256:
616 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
617 ; X64-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
618 ; X64-NEXT: retq # encoding: [0xc3]
619 %1 = bitcast i8 %mask to <8 x i1>
620 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
621 %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
625 define <8 x float> @test_expand_ps_256(<8 x float> %data) {
626 ; CHECK-LABEL: test_expand_ps_256:
628 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
629 %1 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
633 define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
634 ; X86-LABEL: test_mask_expand_ps_256:
636 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
637 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
638 ; X86-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
639 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
640 ; X86-NEXT: retl # encoding: [0xc3]
642 ; X64-LABEL: test_mask_expand_ps_256:
644 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
645 ; X64-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
646 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
647 ; X64-NEXT: retq # encoding: [0xc3]
648 %1 = bitcast i8 %mask to <8 x i1>
649 %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
653 define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
654 ; X86-LABEL: test_maskz_expand_ps_256:
656 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
657 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
658 ; X86-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
659 ; X86-NEXT: retl # encoding: [0xc3]
661 ; X64-LABEL: test_maskz_expand_ps_256:
663 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
664 ; X64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
665 ; X64-NEXT: retq # encoding: [0xc3]
666 %1 = bitcast i8 %mask to <8 x i1>
667 %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
671 define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
672 ; CHECK-LABEL: test_expand_q_256:
674 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
675 %1 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
679 define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
680 ; X86-LABEL: test_mask_expand_q_256:
682 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
683 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
684 ; X86-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
685 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
686 ; X86-NEXT: retl # encoding: [0xc3]
688 ; X64-LABEL: test_mask_expand_q_256:
690 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
691 ; X64-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
692 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
693 ; X64-NEXT: retq # encoding: [0xc3]
694 %1 = bitcast i8 %mask to <8 x i1>
695 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
696 %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
700 define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
701 ; X86-LABEL: test_maskz_expand_q_256:
703 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
704 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
705 ; X86-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
706 ; X86-NEXT: retl # encoding: [0xc3]
708 ; X64-LABEL: test_maskz_expand_q_256:
710 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
711 ; X64-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
712 ; X64-NEXT: retq # encoding: [0xc3]
713 %1 = bitcast i8 %mask to <8 x i1>
714 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
715 %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
719 define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
720 ; CHECK-LABEL: test_expand_d_256:
722 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
723 %1 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
727 define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
728 ; X86-LABEL: test_mask_expand_d_256:
730 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
731 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
732 ; X86-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
733 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
734 ; X86-NEXT: retl # encoding: [0xc3]
736 ; X64-LABEL: test_mask_expand_d_256:
738 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
739 ; X64-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
740 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
741 ; X64-NEXT: retq # encoding: [0xc3]
742 %1 = bitcast i8 %mask to <8 x i1>
743 %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
747 define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
748 ; X86-LABEL: test_maskz_expand_d_256:
750 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
751 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
752 ; X86-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
753 ; X86-NEXT: retl # encoding: [0xc3]
755 ; X64-LABEL: test_maskz_expand_d_256:
757 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
758 ; X64-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
759 ; X64-NEXT: retq # encoding: [0xc3]
760 %1 = bitcast i8 %mask to <8 x i1>
761 %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
765 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
766 ; CHECK-LABEL: test_cmpps_256:
768 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
769 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
770 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
771 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
772 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
773 %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
774 %1 = bitcast <8 x i1> %res to i8
777 declare <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, <8 x i1>)
779 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
780 ; CHECK-LABEL: test_cmpps_128:
782 ; CHECK-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
783 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
784 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
785 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
786 %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
787 %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
788 %2 = bitcast <8 x i1> %1 to i8
791 declare <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, <4 x i1>)
793 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
794 ; CHECK-LABEL: test_cmppd_256:
796 ; CHECK-NEXT: vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
797 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
798 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
799 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
800 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
801 %res = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
802 %1 = shufflevector <4 x i1> %res, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
803 %2 = bitcast <8 x i1> %1 to i8
806 declare <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, <4 x i1>)
808 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
809 ; CHECK-LABEL: test_cmppd_128:
811 ; CHECK-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
812 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
813 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
814 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
815 %res = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, <2 x i1> <i1 true, i1 true>)
816 %1 = shufflevector <2 x i1> %res, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
817 %2 = bitcast <8 x i1> %1 to i8
820 declare <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, <2 x i1>)
822 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
823 ; X86-LABEL: test_mm512_maskz_max_ps_256:
825 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
826 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
827 ; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
828 ; X86-NEXT: retl # encoding: [0xc3]
830 ; X64-LABEL: test_mm512_maskz_max_ps_256:
832 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
833 ; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
834 ; X64-NEXT: retq # encoding: [0xc3]
835 %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
836 %2 = bitcast i8 %mask to <8 x i1>
837 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
841 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
842 ; X86-LABEL: test_mm512_mask_max_ps_256:
844 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
845 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
846 ; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
847 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
848 ; X86-NEXT: retl # encoding: [0xc3]
850 ; X64-LABEL: test_mm512_mask_max_ps_256:
852 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
853 ; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
854 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
855 ; X64-NEXT: retq # encoding: [0xc3]
856 %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
857 %2 = bitcast i8 %mask to <8 x i1>
858 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
862 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
863 ; CHECK-LABEL: test_mm512_max_ps_256:
865 ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
866 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
867 %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
870 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>)
872 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
873 ; X86-LABEL: test_mm512_maskz_max_ps_128:
875 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
876 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
877 ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
878 ; X86-NEXT: retl # encoding: [0xc3]
880 ; X64-LABEL: test_mm512_maskz_max_ps_128:
882 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
883 ; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
884 ; X64-NEXT: retq # encoding: [0xc3]
885 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
886 %2 = bitcast i8 %mask to <8 x i1>
887 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
888 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
892 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
893 ; X86-LABEL: test_mm512_mask_max_ps_128:
895 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
896 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
897 ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
898 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
899 ; X86-NEXT: retl # encoding: [0xc3]
901 ; X64-LABEL: test_mm512_mask_max_ps_128:
903 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
904 ; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
905 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
906 ; X64-NEXT: retq # encoding: [0xc3]
907 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
908 %2 = bitcast i8 %mask to <8 x i1>
909 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
910 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
914 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
915 ; CHECK-LABEL: test_mm512_max_ps_128:
917 ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
918 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
919 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
922 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
924 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
925 ; X86-LABEL: test_mm512_maskz_min_ps_256:
927 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
928 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
929 ; X86-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
930 ; X86-NEXT: retl # encoding: [0xc3]
932 ; X64-LABEL: test_mm512_maskz_min_ps_256:
934 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
935 ; X64-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
936 ; X64-NEXT: retq # encoding: [0xc3]
937 %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
938 %2 = bitcast i8 %mask to <8 x i1>
939 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
943 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
944 ; X86-LABEL: test_mm512_mask_min_ps_256:
946 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
947 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
948 ; X86-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
949 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
950 ; X86-NEXT: retl # encoding: [0xc3]
952 ; X64-LABEL: test_mm512_mask_min_ps_256:
954 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
955 ; X64-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
956 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
957 ; X64-NEXT: retq # encoding: [0xc3]
958 %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
959 %2 = bitcast i8 %mask to <8 x i1>
960 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %src
964 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
965 ; CHECK-LABEL: test_mm512_min_ps_256:
967 ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
968 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
969 %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
972 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>)
974 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
975 ; X86-LABEL: test_mm512_maskz_min_ps_128:
977 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
978 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
979 ; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
980 ; X86-NEXT: retl # encoding: [0xc3]
982 ; X64-LABEL: test_mm512_maskz_min_ps_128:
984 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
985 ; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
986 ; X64-NEXT: retq # encoding: [0xc3]
987 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
988 %2 = bitcast i8 %mask to <8 x i1>
989 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
990 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
994 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
995 ; X86-LABEL: test_mm512_mask_min_ps_128:
997 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
998 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
999 ; X86-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
1000 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1001 ; X86-NEXT: retl # encoding: [0xc3]
1003 ; X64-LABEL: test_mm512_mask_min_ps_128:
1005 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1006 ; X64-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
1007 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1008 ; X64-NEXT: retq # encoding: [0xc3]
1009 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1010 %2 = bitcast i8 %mask to <8 x i1>
1011 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1012 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %src
1016 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
1017 ; CHECK-LABEL: test_mm512_min_ps_128:
1019 ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
1020 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1021 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1024 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
1026 define <4 x double> @test_getexp_pd_256(<4 x double> %a0) {
1027 ; CHECK-LABEL: test_getexp_pd_256:
1029 ; CHECK-NEXT: vgetexppd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x42,0xc0]
1030 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1031 %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
1032 ret <4 x double> %res
1035 declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
1037 define <8 x float> @test_getexp_ps_256(<8 x float> %a0) {
1038 ; CHECK-LABEL: test_getexp_ps_256:
1040 ; CHECK-NEXT: vgetexpps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x42,0xc0]
1041 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1042 %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
1043 ret <8 x float> %res
1045 declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
1047 declare <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>)
1049 define <4 x i32>@test_int_x86_avx512_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1050 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_128:
1052 ; CHECK-NEXT: vpermt2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xc2]
1053 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1054 %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1058 define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1059 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
1061 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1062 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1063 ; X86-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
1064 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1065 ; X86-NEXT: retl # encoding: [0xc3]
1067 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
1069 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1070 ; X64-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
1071 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1072 ; X64-NEXT: retq # encoding: [0xc3]
1073 %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1074 %2 = bitcast i8 %x3 to <8 x i1>
1075 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1076 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
1080 define <4 x i32>@test_int_x86_avx512_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1081 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_128:
1083 ; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x76,0xc2]
1084 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1085 %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1089 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1090 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
1092 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1093 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1094 ; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
1095 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1096 ; X86-NEXT: retl # encoding: [0xc3]
1098 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
1100 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1101 ; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
1102 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1103 ; X64-NEXT: retq # encoding: [0xc3]
1104 %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1105 %2 = bitcast i8 %x3 to <8 x i1>
1106 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1107 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x1
1111 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1112 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
1114 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1115 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1116 ; X86-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2]
1117 ; X86-NEXT: retl # encoding: [0xc3]
1119 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
1121 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1122 ; X64-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2]
1123 ; X64-NEXT: retq # encoding: [0xc3]
1124 %1 = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1125 %2 = bitcast i8 %x3 to <8 x i1>
1126 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1127 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
1131 declare <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>)
1133 define <8 x i32>@test_int_x86_avx512_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1134 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_256:
1136 ; CHECK-NEXT: vpermt2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xc2]
1137 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1138 %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1142 define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1143 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
1145 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1146 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1147 ; X86-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
1148 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1149 ; X86-NEXT: retl # encoding: [0xc3]
1151 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
1153 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1154 ; X64-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
1155 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1156 ; X64-NEXT: retq # encoding: [0xc3]
1157 %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1158 %2 = bitcast i8 %x3 to <8 x i1>
1159 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
1163 define <8 x i32>@test_int_x86_avx512_ask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1164 ; CHECK-LABEL: test_int_x86_avx512_ask_vpermt2var_d_256:
1166 ; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x76,0xc2]
1167 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1168 %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1172 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1173 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
1175 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1176 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1177 ; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
1178 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1179 ; X86-NEXT: retl # encoding: [0xc3]
1181 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
1183 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1184 ; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
1185 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1186 ; X64-NEXT: retq # encoding: [0xc3]
1187 %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1188 %2 = bitcast i8 %x3 to <8 x i1>
1189 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
1193 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1194 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
1196 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1197 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1198 ; X86-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2]
1199 ; X86-NEXT: retl # encoding: [0xc3]
1201 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
1203 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1204 ; X64-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2]
1205 ; X64-NEXT: retq # encoding: [0xc3]
1206 %1 = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1207 %2 = bitcast i8 %x3 to <8 x i1>
1208 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
1212 declare <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>)
1214 define <2 x double>@test_int_x86_avx512_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) {
1215 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_128:
1217 ; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xc2]
1218 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1219 %1 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
1223 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
1224 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
1226 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1227 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1228 ; X86-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
1229 ; X86-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
1230 ; X86-NEXT: retl # encoding: [0xc3]
1232 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
1234 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1235 ; X64-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
1236 ; X64-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
1237 ; X64-NEXT: retq # encoding: [0xc3]
1238 %1 = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2)
1239 %2 = bitcast <2 x i64> %x1 to <2 x double>
1240 %3 = bitcast i8 %x3 to <8 x i1>
1241 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
1242 %4 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %2
1246 declare <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>)
1248 define <4 x double>@test_int_x86_avx512_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
1249 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_256:
1251 ; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xc2]
1252 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1253 %1 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
1257 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
1258 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
1260 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1261 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1262 ; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
1263 ; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
1264 ; X86-NEXT: retl # encoding: [0xc3]
1266 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
1268 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1269 ; X64-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
1270 ; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
1271 ; X64-NEXT: retq # encoding: [0xc3]
1272 %1 = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2)
1273 %2 = bitcast <4 x i64> %x1 to <4 x double>
1274 %3 = bitcast i8 %x3 to <8 x i1>
1275 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1276 %4 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %2
1280 declare <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>)
1282 define <4 x float>@test_int_x86_avx512_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) {
1283 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_128:
1285 ; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xc2]
1286 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1287 %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
1291 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
1292 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
1294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1295 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1296 ; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1297 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1298 ; X86-NEXT: retl # encoding: [0xc3]
1300 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
1302 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1303 ; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1304 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1305 ; X64-NEXT: retq # encoding: [0xc3]
1306 %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2)
1307 %2 = bitcast <4 x i32> %x1 to <4 x float>
1308 %3 = bitcast i8 %x3 to <8 x i1>
1309 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1310 %4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
1314 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
1315 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
1317 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1318 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1319 ; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1320 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1321 ; X86-NEXT: retl # encoding: [0xc3]
1323 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
1325 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1326 ; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
1327 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1328 ; X64-NEXT: retq # encoding: [0xc3]
1329 %x1cast = bitcast <2 x i64> %x1 to <4 x i32>
1330 %1 = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2)
1331 %2 = bitcast <4 x i32> %x1cast to <4 x float>
1332 %3 = bitcast i8 %x3 to <8 x i1>
1333 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1334 %4 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %2
1338 declare <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>)
1340 define <8 x float>@test_int_x86_avx512_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) {
1341 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_256:
1343 ; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xc2]
1344 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1345 %1 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
1349 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
1350 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
1352 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1353 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1354 ; X86-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
1355 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1356 ; X86-NEXT: retl # encoding: [0xc3]
1358 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
1360 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1361 ; X64-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
1362 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1363 ; X64-NEXT: retq # encoding: [0xc3]
1364 %1 = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2)
1365 %2 = bitcast <8 x i32> %x1 to <8 x float>
1366 %3 = bitcast i8 %x3 to <8 x i1>
1367 %4 = select <8 x i1> %3, <8 x float> %1, <8 x float> %2
1371 declare <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>)
1373 define <2 x i64>@test_int_x86_avx512_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1374 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_128:
1376 ; CHECK-NEXT: vpermt2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xc2]
1377 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1378 %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1382 define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1383 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
1385 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1386 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1387 ; X86-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
1388 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1389 ; X86-NEXT: retl # encoding: [0xc3]
1391 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
1393 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1394 ; X64-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
1395 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1396 ; X64-NEXT: retq # encoding: [0xc3]
1397 %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1398 %2 = bitcast i8 %x3 to <8 x i1>
1399 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1400 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
1404 define <2 x i64>@test_int_x86_avx512_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1405 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_128:
1407 ; CHECK-NEXT: vpermi2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x76,0xc2]
1408 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1409 %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1413 define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1414 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
1416 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1417 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1418 ; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
1419 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1420 ; X86-NEXT: retl # encoding: [0xc3]
1422 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
1424 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1425 ; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
1426 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1427 ; X64-NEXT: retq # encoding: [0xc3]
1428 %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1429 %2 = bitcast i8 %x3 to <8 x i1>
1430 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1431 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
1435 define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1436 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
1438 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1439 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1440 ; X86-NEXT: vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2]
1441 ; X86-NEXT: retl # encoding: [0xc3]
1443 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
1445 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1446 ; X64-NEXT: vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2]
1447 ; X64-NEXT: retq # encoding: [0xc3]
1448 %1 = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1449 %2 = bitcast i8 %x3 to <8 x i1>
1450 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1451 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
1455 declare <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>)
1457 define <4 x i64>@test_int_x86_avx512_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1458 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_256:
1460 ; CHECK-NEXT: vpermt2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xc2]
1461 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1462 %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1466 define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1467 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
1469 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1470 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1471 ; X86-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
1472 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1473 ; X86-NEXT: retl # encoding: [0xc3]
1475 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
1477 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1478 ; X64-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
1479 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1480 ; X64-NEXT: retq # encoding: [0xc3]
1481 %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1482 %2 = bitcast i8 %x3 to <8 x i1>
1483 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1484 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
1488 define <4 x i64>@test_int_x86_avx512_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1489 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_256:
1491 ; CHECK-NEXT: vpermi2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x76,0xc2]
1492 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1493 %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1497 define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1498 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
1500 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1501 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1502 ; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
1503 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1504 ; X86-NEXT: retl # encoding: [0xc3]
1506 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
1508 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1509 ; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
1510 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1511 ; X64-NEXT: retq # encoding: [0xc3]
1512 %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1513 %2 = bitcast i8 %x3 to <8 x i1>
1514 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1515 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
1519 define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1520 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
1522 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1523 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1524 ; X86-NEXT: vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2]
1525 ; X86-NEXT: retl # encoding: [0xc3]
1527 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
1529 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1530 ; X64-NEXT: vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2]
1531 ; X64-NEXT: retq # encoding: [0xc3]
1532 %1 = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1533 %2 = bitcast i8 %x3 to <8 x i1>
1534 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1535 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
1539 declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1541 define <2 x double>@test_int_x86_avx512_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2) {
1542 ; CHECK-LABEL: test_int_x86_avx512_scalef_pd_128:
1544 ; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1]
1545 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1546 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1547 ret <2 x double> %res
1550 define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1551 ; X86-LABEL: test_int_x86_avx512_mask_scalef_pd_128:
1553 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1554 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1555 ; X86-NEXT: vscalefpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
1556 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
1557 ; X86-NEXT: retl # encoding: [0xc3]
1559 ; X64-LABEL: test_int_x86_avx512_mask_scalef_pd_128:
1561 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1562 ; X64-NEXT: vscalefpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1]
1563 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
1564 ; X64-NEXT: retq # encoding: [0xc3]
1565 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1566 ret <2 x double> %res
1569 declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1571 define <4 x double>@test_int_x86_avx512_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2) {
1572 ; CHECK-LABEL: test_int_x86_avx512_scalef_pd_256:
1574 ; CHECK-NEXT: vscalefpd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x2c,0xc1]
1575 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1576 %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1577 ret <4 x double> %res
1580 define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1581 ; X86-LABEL: test_int_x86_avx512_mask_scalef_pd_256:
1583 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1584 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1585 ; X86-NEXT: vscalefpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1]
1586 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
1587 ; X86-NEXT: retl # encoding: [0xc3]
1589 ; X64-LABEL: test_int_x86_avx512_mask_scalef_pd_256:
1591 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1592 ; X64-NEXT: vscalefpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1]
1593 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
1594 ; X64-NEXT: retq # encoding: [0xc3]
1595 %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1596 ret <4 x double> %res
1599 declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1601 define <4 x float>@test_int_x86_avx512_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
1602 ; CHECK-LABEL: test_int_x86_avx512_scalef_ps_128:
1604 ; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1]
1605 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1606 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1607 ret <4 x float> %res
1610 define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1611 ; X86-LABEL: test_int_x86_avx512_mask_scalef_ps_128:
1613 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1614 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1615 ; X86-NEXT: vscalefps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
1616 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1617 ; X86-NEXT: retl # encoding: [0xc3]
1619 ; X64-LABEL: test_int_x86_avx512_mask_scalef_ps_128:
1621 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1622 ; X64-NEXT: vscalefps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1]
1623 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
1624 ; X64-NEXT: retq # encoding: [0xc3]
1625 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1626 ret <4 x float> %res
1629 declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1631 define <8 x float>@test_int_x86_avx512_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2) {
1632 ; CHECK-LABEL: test_int_x86_avx512_scalef_ps_256:
1634 ; CHECK-NEXT: vscalefps %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x2c,0xc1]
1635 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1636 %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1637 ret <8 x float> %res
1640 define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1641 ; X86-LABEL: test_int_x86_avx512_mask_scalef_ps_256:
1643 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1644 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1645 ; X86-NEXT: vscalefps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1]
1646 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1647 ; X86-NEXT: retl # encoding: [0xc3]
1649 ; X64-LABEL: test_int_x86_avx512_mask_scalef_ps_256:
1651 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1652 ; X64-NEXT: vscalefps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1]
1653 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
1654 ; X64-NEXT: retq # encoding: [0xc3]
1655 %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1656 ret <8 x float> %res
1659 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
1661 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1662 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
1664 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1665 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1666 ; X86-NEXT: vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
1667 ; X86-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
1668 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1669 ; X86-NEXT: vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
1670 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1671 ; X86-NEXT: retl # encoding: [0xc3]
1673 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
1675 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1676 ; X64-NEXT: vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
1677 ; X64-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
1678 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1679 ; X64-NEXT: vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
1680 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1681 ; X64-NEXT: retq # encoding: [0xc3]
1682 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1683 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1684 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1685 %res3 = add <16 x i8> %res0, %res1
1686 %res4 = add <16 x i8> %res3, %res2
1690 declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(ptr %ptr, <2 x i64>, i8)
1692 define void @test_int_x86_avx512_mask_pmov_qb_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
1693 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
1695 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1696 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1697 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1698 ; X86-NEXT: vpmovqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x32,0x00]
1699 ; X86-NEXT: vpmovqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0x00]
1700 ; X86-NEXT: retl # encoding: [0xc3]
1702 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
1704 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1705 ; X64-NEXT: vpmovqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x32,0x07]
1706 ; X64-NEXT: vpmovqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0x07]
1707 ; X64-NEXT: retq # encoding: [0xc3]
1708 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
1709 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
1713 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
1715 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1716 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
1718 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1719 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1720 ; X86-NEXT: vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
1721 ; X86-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
1722 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1723 ; X86-NEXT: vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
1724 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1725 ; X86-NEXT: retl # encoding: [0xc3]
1727 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
1729 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1730 ; X64-NEXT: vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
1731 ; X64-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
1732 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1733 ; X64-NEXT: vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
1734 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1735 ; X64-NEXT: retq # encoding: [0xc3]
1736 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1737 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1738 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1739 %res3 = add <16 x i8> %res0, %res1
1740 %res4 = add <16 x i8> %res3, %res2
1744 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(ptr %ptr, <2 x i64>, i8)
1746 define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
1747 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
1749 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1750 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1751 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1752 ; X86-NEXT: vpmovsqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x22,0x00]
1753 ; X86-NEXT: vpmovsqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0x00]
1754 ; X86-NEXT: retl # encoding: [0xc3]
1756 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
1758 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1759 ; X64-NEXT: vpmovsqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
1760 ; X64-NEXT: vpmovsqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07]
1761 ; X64-NEXT: retq # encoding: [0xc3]
1762 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
1763 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
1767 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
1769 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1770 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
1772 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1773 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1774 ; X86-NEXT: vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
1775 ; X86-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
1776 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1777 ; X86-NEXT: vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
1778 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1779 ; X86-NEXT: retl # encoding: [0xc3]
1781 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
1783 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1784 ; X64-NEXT: vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
1785 ; X64-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
1786 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1787 ; X64-NEXT: vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
1788 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1789 ; X64-NEXT: retq # encoding: [0xc3]
1790 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
1791 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
1792 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1793 %res3 = add <16 x i8> %res0, %res1
1794 %res4 = add <16 x i8> %res3, %res2
1798 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(ptr %ptr, <2 x i64>, i8)
1800 define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
1801 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
1803 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1804 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1805 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1806 ; X86-NEXT: vpmovusqb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x12,0x00]
1807 ; X86-NEXT: vpmovusqb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0x00]
1808 ; X86-NEXT: retl # encoding: [0xc3]
1810 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
1812 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1813 ; X64-NEXT: vpmovusqb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
1814 ; X64-NEXT: vpmovusqb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07]
1815 ; X64-NEXT: retq # encoding: [0xc3]
1816 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
1817 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
1821 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
1823 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1824 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
1826 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1827 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1828 ; X86-NEXT: vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
1829 ; X86-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
1830 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1831 ; X86-NEXT: vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
1832 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1833 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1834 ; X86-NEXT: retl # encoding: [0xc3]
1836 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
1838 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1839 ; X64-NEXT: vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
1840 ; X64-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
1841 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1842 ; X64-NEXT: vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
1843 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1844 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1845 ; X64-NEXT: retq # encoding: [0xc3]
1846 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1847 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1848 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1849 %res3 = add <16 x i8> %res0, %res1
1850 %res4 = add <16 x i8> %res3, %res2
1854 declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(ptr %ptr, <4 x i64>, i8)
1856 define void @test_int_x86_avx512_mask_pmov_qb_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
1857 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
1859 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1860 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1861 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1862 ; X86-NEXT: vpmovqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x32,0x00]
1863 ; X86-NEXT: vpmovqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0x00]
1864 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1865 ; X86-NEXT: retl # encoding: [0xc3]
1867 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
1869 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1870 ; X64-NEXT: vpmovqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x32,0x07]
1871 ; X64-NEXT: vpmovqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0x07]
1872 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1873 ; X64-NEXT: retq # encoding: [0xc3]
1874 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
1875 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
1879 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
1881 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1882 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
1884 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1885 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1886 ; X86-NEXT: vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
1887 ; X86-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
1888 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1889 ; X86-NEXT: vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
1890 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1891 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1892 ; X86-NEXT: retl # encoding: [0xc3]
1894 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
1896 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1897 ; X64-NEXT: vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
1898 ; X64-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
1899 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1900 ; X64-NEXT: vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
1901 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1902 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1903 ; X64-NEXT: retq # encoding: [0xc3]
1904 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1905 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1906 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1907 %res3 = add <16 x i8> %res0, %res1
1908 %res4 = add <16 x i8> %res3, %res2
1912 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(ptr %ptr, <4 x i64>, i8)
1914 define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
1915 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
1917 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1918 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1919 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1920 ; X86-NEXT: vpmovsqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x22,0x00]
1921 ; X86-NEXT: vpmovsqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0x00]
1922 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1923 ; X86-NEXT: retl # encoding: [0xc3]
1925 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
1927 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1928 ; X64-NEXT: vpmovsqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
1929 ; X64-NEXT: vpmovsqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07]
1930 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1931 ; X64-NEXT: retq # encoding: [0xc3]
1932 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
1933 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
1937 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
1939 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
1940 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
1942 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1943 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1944 ; X86-NEXT: vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
1945 ; X86-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
1946 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1947 ; X86-NEXT: vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
1948 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1949 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1950 ; X86-NEXT: retl # encoding: [0xc3]
1952 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
1954 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1955 ; X64-NEXT: vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
1956 ; X64-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
1957 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
1958 ; X64-NEXT: vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
1959 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
1960 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1961 ; X64-NEXT: retq # encoding: [0xc3]
1962 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
1963 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
1964 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
1965 %res3 = add <16 x i8> %res0, %res1
1966 %res4 = add <16 x i8> %res3, %res2
1970 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(ptr %ptr, <4 x i64>, i8)
1972 define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
1973 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
1975 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1976 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1977 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1978 ; X86-NEXT: vpmovusqb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x12,0x00]
1979 ; X86-NEXT: vpmovusqb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0x00]
1980 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1981 ; X86-NEXT: retl # encoding: [0xc3]
1983 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
1985 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1986 ; X64-NEXT: vpmovusqb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
1987 ; X64-NEXT: vpmovusqb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07]
1988 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1989 ; X64-NEXT: retq # encoding: [0xc3]
1990 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
1991 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
1995 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
1997 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
1998 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
2000 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2001 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2002 ; X86-NEXT: vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
2003 ; X86-NEXT: vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
2004 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2005 ; X86-NEXT: vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
2006 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2007 ; X86-NEXT: retl # encoding: [0xc3]
2009 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
2011 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2012 ; X64-NEXT: vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
2013 ; X64-NEXT: vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
2014 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2015 ; X64-NEXT: vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
2016 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2017 ; X64-NEXT: retq # encoding: [0xc3]
2018 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2019 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2020 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2021 %res3 = add <8 x i16> %res0, %res1
2022 %res4 = add <8 x i16> %res3, %res2
2026 declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(ptr %ptr, <2 x i64>, i8)
2028 define void @test_int_x86_avx512_mask_pmov_qw_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2029 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
2031 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2032 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2033 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2034 ; X86-NEXT: vpmovqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x34,0x00]
2035 ; X86-NEXT: vpmovqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0x00]
2036 ; X86-NEXT: retl # encoding: [0xc3]
2038 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
2040 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2041 ; X64-NEXT: vpmovqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x34,0x07]
2042 ; X64-NEXT: vpmovqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0x07]
2043 ; X64-NEXT: retq # encoding: [0xc3]
2044 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2045 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2049 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
2051 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2052 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
2054 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2055 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2056 ; X86-NEXT: vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
2057 ; X86-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
2058 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2059 ; X86-NEXT: vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
2060 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2061 ; X86-NEXT: retl # encoding: [0xc3]
2063 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
2065 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2066 ; X64-NEXT: vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
2067 ; X64-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
2068 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2069 ; X64-NEXT: vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
2070 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2071 ; X64-NEXT: retq # encoding: [0xc3]
2072 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2073 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2074 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2075 %res3 = add <8 x i16> %res0, %res1
2076 %res4 = add <8 x i16> %res3, %res2
2080 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(ptr %ptr, <2 x i64>, i8)
2082 define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2083 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
2085 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2086 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2087 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2088 ; X86-NEXT: vpmovsqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x24,0x00]
2089 ; X86-NEXT: vpmovsqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0x00]
2090 ; X86-NEXT: retl # encoding: [0xc3]
2092 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
2094 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2095 ; X64-NEXT: vpmovsqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
2096 ; X64-NEXT: vpmovsqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07]
2097 ; X64-NEXT: retq # encoding: [0xc3]
2098 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2099 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2103 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
2105 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2106 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
2108 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2109 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2110 ; X86-NEXT: vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
2111 ; X86-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
2112 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2113 ; X86-NEXT: vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
2114 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2115 ; X86-NEXT: retl # encoding: [0xc3]
2117 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
2119 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2120 ; X64-NEXT: vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
2121 ; X64-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
2122 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2123 ; X64-NEXT: vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
2124 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2125 ; X64-NEXT: retq # encoding: [0xc3]
2126 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
2127 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
2128 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2129 %res3 = add <8 x i16> %res0, %res1
2130 %res4 = add <8 x i16> %res3, %res2
2134 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(ptr %ptr, <2 x i64>, i8)
2136 define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2137 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
2139 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2140 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2141 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2142 ; X86-NEXT: vpmovusqw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x14,0x00]
2143 ; X86-NEXT: vpmovusqw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0x00]
2144 ; X86-NEXT: retl # encoding: [0xc3]
2146 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
2148 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2149 ; X64-NEXT: vpmovusqw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
2150 ; X64-NEXT: vpmovusqw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07]
2151 ; X64-NEXT: retq # encoding: [0xc3]
2152 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2153 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2157 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
2159 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2160 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
2162 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2163 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2164 ; X86-NEXT: vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
2165 ; X86-NEXT: vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
2166 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2167 ; X86-NEXT: vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
2168 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2169 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2170 ; X86-NEXT: retl # encoding: [0xc3]
2172 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
2174 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2175 ; X64-NEXT: vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
2176 ; X64-NEXT: vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
2177 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2178 ; X64-NEXT: vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
2179 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2180 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2181 ; X64-NEXT: retq # encoding: [0xc3]
2182 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2183 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2184 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2185 %res3 = add <8 x i16> %res0, %res1
2186 %res4 = add <8 x i16> %res3, %res2
2190 declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(ptr %ptr, <4 x i64>, i8)
2192 define void @test_int_x86_avx512_mask_pmov_qw_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2193 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
2195 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2196 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2197 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2198 ; X86-NEXT: vpmovqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x34,0x00]
2199 ; X86-NEXT: vpmovqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0x00]
2200 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2201 ; X86-NEXT: retl # encoding: [0xc3]
2203 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
2205 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2206 ; X64-NEXT: vpmovqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x34,0x07]
2207 ; X64-NEXT: vpmovqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0x07]
2208 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2209 ; X64-NEXT: retq # encoding: [0xc3]
2210 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2211 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2215 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
2217 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2218 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
2220 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2221 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2222 ; X86-NEXT: vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
2223 ; X86-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
2224 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2225 ; X86-NEXT: vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
2226 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2227 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2228 ; X86-NEXT: retl # encoding: [0xc3]
2230 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
2232 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2233 ; X64-NEXT: vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
2234 ; X64-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
2235 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2236 ; X64-NEXT: vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
2237 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2238 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2239 ; X64-NEXT: retq # encoding: [0xc3]
2240 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2241 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2242 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2243 %res3 = add <8 x i16> %res0, %res1
2244 %res4 = add <8 x i16> %res3, %res2
2248 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(ptr %ptr, <4 x i64>, i8)
2250 define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2251 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
2253 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2254 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2256 ; X86-NEXT: vpmovsqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x24,0x00]
2257 ; X86-NEXT: vpmovsqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0x00]
2258 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2259 ; X86-NEXT: retl # encoding: [0xc3]
2261 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
2263 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2264 ; X64-NEXT: vpmovsqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
2265 ; X64-NEXT: vpmovsqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07]
2266 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2267 ; X64-NEXT: retq # encoding: [0xc3]
2268 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2269 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2273 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
2275 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2276 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
2278 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2279 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2280 ; X86-NEXT: vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
2281 ; X86-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
2282 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2283 ; X86-NEXT: vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
2284 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2285 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2286 ; X86-NEXT: retl # encoding: [0xc3]
2288 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
2290 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2291 ; X64-NEXT: vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
2292 ; X64-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
2293 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
2294 ; X64-NEXT: vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
2295 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
2296 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2297 ; X64-NEXT: retq # encoding: [0xc3]
2298 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
2299 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
2300 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2301 %res3 = add <8 x i16> %res0, %res1
2302 %res4 = add <8 x i16> %res3, %res2
2306 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(ptr %ptr, <4 x i64>, i8)
2308 define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2309 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
2311 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2312 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2313 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2314 ; X86-NEXT: vpmovusqw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x14,0x00]
2315 ; X86-NEXT: vpmovusqw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0x00]
2316 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2317 ; X86-NEXT: retl # encoding: [0xc3]
2319 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
2321 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2322 ; X64-NEXT: vpmovusqw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
2323 ; X64-NEXT: vpmovusqw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07]
2324 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2325 ; X64-NEXT: retq # encoding: [0xc3]
2326 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2327 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2331 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
2333 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2334 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
2336 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2337 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2338 ; X86-NEXT: vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
2339 ; X86-NEXT: vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
2340 ; X86-NEXT: vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2341 ; X86-NEXT: vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
2342 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2343 ; X86-NEXT: retl # encoding: [0xc3]
2345 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
2347 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2348 ; X64-NEXT: vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
2349 ; X64-NEXT: vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
2350 ; X64-NEXT: vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2351 ; X64-NEXT: vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
2352 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2353 ; X64-NEXT: retq # encoding: [0xc3]
2354 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2355 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2356 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2357 %res3 = add <4 x i32> %res0, %res1
2358 %res4 = add <4 x i32> %res3, %res2
2362 declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(ptr %ptr, <2 x i64>, i8)
2364 define void @test_int_x86_avx512_mask_pmov_qd_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2365 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
2367 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2368 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2370 ; X86-NEXT: vpmovqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x35,0x00]
2371 ; X86-NEXT: vpmovqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0x00]
2372 ; X86-NEXT: retl # encoding: [0xc3]
2374 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
2376 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2377 ; X64-NEXT: vpmovqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x35,0x07]
2378 ; X64-NEXT: vpmovqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0x07]
2379 ; X64-NEXT: retq # encoding: [0xc3]
2380 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2381 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2385 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
2387 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2388 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
2390 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2391 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2392 ; X86-NEXT: vpmovsqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc2]
2393 ; X86-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1]
2394 ; X86-NEXT: vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2395 ; X86-NEXT: vpmovsqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc0]
2396 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2397 ; X86-NEXT: retl # encoding: [0xc3]
2399 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
2401 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2402 ; X64-NEXT: vpmovsqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc2]
2403 ; X64-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1]
2404 ; X64-NEXT: vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2405 ; X64-NEXT: vpmovsqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc0]
2406 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2407 ; X64-NEXT: retq # encoding: [0xc3]
2408 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2409 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2410 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2411 %res3 = add <4 x i32> %res0, %res1
2412 %res4 = add <4 x i32> %res3, %res2
2416 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(ptr %ptr, <2 x i64>, i8)
2418 define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2419 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
2421 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2422 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2423 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2424 ; X86-NEXT: vpmovsqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x25,0x00]
2425 ; X86-NEXT: vpmovsqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0x00]
2426 ; X86-NEXT: retl # encoding: [0xc3]
2428 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
2430 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2431 ; X64-NEXT: vpmovsqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
2432 ; X64-NEXT: vpmovsqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07]
2433 ; X64-NEXT: retq # encoding: [0xc3]
2434 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2435 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2439 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
2441 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2442 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
2444 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2445 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2446 ; X86-NEXT: vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
2447 ; X86-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
2448 ; X86-NEXT: vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2449 ; X86-NEXT: vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
2450 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2451 ; X86-NEXT: retl # encoding: [0xc3]
2453 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
2455 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2456 ; X64-NEXT: vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
2457 ; X64-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
2458 ; X64-NEXT: vpaddd %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9]
2459 ; X64-NEXT: vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
2460 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
2461 ; X64-NEXT: retq # encoding: [0xc3]
2462 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
2463 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
2464 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2465 %res3 = add <4 x i32> %res0, %res1
2466 %res4 = add <4 x i32> %res3, %res2
2470 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(ptr %ptr, <2 x i64>, i8)
2472 define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(ptr %ptr, <2 x i64> %x1, i8 %x2) {
2473 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
2475 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2476 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2477 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2478 ; X86-NEXT: vpmovusqd %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x15,0x00]
2479 ; X86-NEXT: vpmovusqd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0x00]
2480 ; X86-NEXT: retl # encoding: [0xc3]
2482 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
2484 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2485 ; X64-NEXT: vpmovusqd %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
2486 ; X64-NEXT: vpmovusqd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07]
2487 ; X64-NEXT: retq # encoding: [0xc3]
2488 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 -1)
2489 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(ptr %ptr, <2 x i64> %x1, i8 %x2)
2493 define <4 x i32>@test_int_x86_avx512_pmov_qd_256(<4 x i64> %x0) {
2494 ; CHECK-LABEL: test_int_x86_avx512_pmov_qd_256:
2496 ; CHECK-NEXT: vpmovqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0]
2497 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2498 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2499 %1 = trunc <4 x i64> %x0 to <4 x i32>
2503 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2504 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
2506 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2507 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2508 ; X86-NEXT: vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
2509 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2510 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2511 ; X86-NEXT: retl # encoding: [0xc3]
2513 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
2515 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2516 ; X64-NEXT: vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
2517 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2518 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2519 ; X64-NEXT: retq # encoding: [0xc3]
2520 %1 = trunc <4 x i64> %x0 to <4 x i32>
2521 %2 = bitcast i8 %x2 to <8 x i1>
2522 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2523 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
2527 define <4 x i32>@test_int_x86_avx512_maskz_pmov_qd_256(<4 x i64> %x0, i8 %x2) {
2528 ; X86-LABEL: test_int_x86_avx512_maskz_pmov_qd_256:
2530 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2531 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2532 ; X86-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
2533 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2534 ; X86-NEXT: retl # encoding: [0xc3]
2536 ; X64-LABEL: test_int_x86_avx512_maskz_pmov_qd_256:
2538 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2539 ; X64-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
2540 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2541 ; X64-NEXT: retq # encoding: [0xc3]
2542 %1 = trunc <4 x i64> %x0 to <4 x i32>
2543 %2 = bitcast i8 %x2 to <8 x i1>
2544 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2545 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
2549 declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(ptr %ptr, <4 x i64>, i8)
2551 define void @test_int_x86_avx512_mask_pmov_qd_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2552 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
2554 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2555 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2556 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2557 ; X86-NEXT: vpmovqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x35,0x00]
2558 ; X86-NEXT: vpmovqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0x00]
2559 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2560 ; X86-NEXT: retl # encoding: [0xc3]
2562 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
2564 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2565 ; X64-NEXT: vpmovqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x35,0x07]
2566 ; X64-NEXT: vpmovqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0x07]
2567 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2568 ; X64-NEXT: retq # encoding: [0xc3]
2569 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2570 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2574 declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
2576 define <4 x i32>@test_int_x86_avx512_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1) {
2577 ; CHECK-LABEL: test_int_x86_avx512_pmovs_qd_256:
2579 ; CHECK-NEXT: vpmovsqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0]
2580 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2581 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2582 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
2586 define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2587 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
2589 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2590 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2591 ; X86-NEXT: vpmovsqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1]
2592 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2593 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2594 ; X86-NEXT: retl # encoding: [0xc3]
2596 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
2598 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2599 ; X64-NEXT: vpmovsqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1]
2600 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2601 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2602 ; X64-NEXT: retq # encoding: [0xc3]
2603 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
2607 define <4 x i32>@test_int_x86_avx512_maskz_pmovs_qd_256(<4 x i64> %x0, i8 %x2) {
2608 ; X86-LABEL: test_int_x86_avx512_maskz_pmovs_qd_256:
2610 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2611 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2612 ; X86-NEXT: vpmovsqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc0]
2613 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2614 ; X86-NEXT: retl # encoding: [0xc3]
2616 ; X64-LABEL: test_int_x86_avx512_maskz_pmovs_qd_256:
2618 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2619 ; X64-NEXT: vpmovsqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc0]
2620 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2621 ; X64-NEXT: retq # encoding: [0xc3]
2622 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2626 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(ptr %ptr, <4 x i64>, i8)
2628 define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2629 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
2631 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2632 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2633 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2634 ; X86-NEXT: vpmovsqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x25,0x00]
2635 ; X86-NEXT: vpmovsqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0x00]
2636 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2637 ; X86-NEXT: retl # encoding: [0xc3]
2639 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
2641 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2642 ; X64-NEXT: vpmovsqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
2643 ; X64-NEXT: vpmovsqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07]
2644 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2645 ; X64-NEXT: retq # encoding: [0xc3]
2646 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2647 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2651 declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
2653 define <4 x i32>@test_int_x86_avx512_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1) {
2654 ; CHECK-LABEL: test_int_x86_avx512_pmovus_qd_256:
2656 ; CHECK-NEXT: vpmovusqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0]
2657 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2658 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2659 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
2663 define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
2664 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
2666 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2667 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2668 ; X86-NEXT: vpmovusqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1]
2669 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2670 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2671 ; X86-NEXT: retl # encoding: [0xc3]
2673 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
2675 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2676 ; X64-NEXT: vpmovusqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1]
2677 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2678 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2679 ; X64-NEXT: retq # encoding: [0xc3]
2680 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
2684 define <4 x i32>@test_int_x86_avx512_maskz_pmovus_qd_256(<4 x i64> %x0, i8 %x2) {
2685 ; X86-LABEL: test_int_x86_avx512_maskz_pmovus_qd_256:
2687 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2688 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2689 ; X86-NEXT: vpmovusqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc0]
2690 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2691 ; X86-NEXT: retl # encoding: [0xc3]
2693 ; X64-LABEL: test_int_x86_avx512_maskz_pmovus_qd_256:
2695 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2696 ; X64-NEXT: vpmovusqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc0]
2697 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2698 ; X64-NEXT: retq # encoding: [0xc3]
2699 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
2703 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(ptr %ptr, <4 x i64>, i8)
2705 define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(ptr %ptr, <4 x i64> %x1, i8 %x2) {
2706 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
2708 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2709 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2710 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2711 ; X86-NEXT: vpmovusqd %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x15,0x00]
2712 ; X86-NEXT: vpmovusqd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0x00]
2713 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2714 ; X86-NEXT: retl # encoding: [0xc3]
2716 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
2718 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2719 ; X64-NEXT: vpmovusqd %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
2720 ; X64-NEXT: vpmovusqd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07]
2721 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2722 ; X64-NEXT: retq # encoding: [0xc3]
2723 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 -1)
2724 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(ptr %ptr, <4 x i64> %x1, i8 %x2)
2728 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
2730 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2731 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128:
2733 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2734 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2735 ; X86-NEXT: vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
2736 ; X86-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
2737 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2738 ; X86-NEXT: vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
2739 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2740 ; X86-NEXT: retl # encoding: [0xc3]
2742 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_128:
2744 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2745 ; X64-NEXT: vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
2746 ; X64-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
2747 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2748 ; X64-NEXT: vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
2749 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2750 ; X64-NEXT: retq # encoding: [0xc3]
2751 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2752 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2753 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2754 %res3 = add <16 x i8> %res0, %res1
2755 %res4 = add <16 x i8> %res3, %res2
2759 declare void @llvm.x86.avx512.mask.pmov.db.mem.128(ptr %ptr, <4 x i32>, i8)
2761 define void @test_int_x86_avx512_mask_pmov_db_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
2762 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
2764 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2765 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2766 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2767 ; X86-NEXT: vpmovdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x31,0x00]
2768 ; X86-NEXT: vpmovdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0x00]
2769 ; X86-NEXT: retl # encoding: [0xc3]
2771 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
2773 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2774 ; X64-NEXT: vpmovdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x31,0x07]
2775 ; X64-NEXT: vpmovdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0x07]
2776 ; X64-NEXT: retq # encoding: [0xc3]
2777 call void @llvm.x86.avx512.mask.pmov.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
2778 call void @llvm.x86.avx512.mask.pmov.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
2782 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
2784 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2785 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
2787 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2788 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2789 ; X86-NEXT: vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
2790 ; X86-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
2791 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2792 ; X86-NEXT: vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
2793 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2794 ; X86-NEXT: retl # encoding: [0xc3]
2796 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
2798 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2799 ; X64-NEXT: vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
2800 ; X64-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
2801 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2802 ; X64-NEXT: vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
2803 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2804 ; X64-NEXT: retq # encoding: [0xc3]
2805 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2806 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2807 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2808 %res3 = add <16 x i8> %res0, %res1
2809 %res4 = add <16 x i8> %res3, %res2
2813 declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(ptr %ptr, <4 x i32>, i8)
2815 define void @test_int_x86_avx512_mask_pmovs_db_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
2816 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
2818 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2819 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2820 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2821 ; X86-NEXT: vpmovsdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x21,0x00]
2822 ; X86-NEXT: vpmovsdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0x00]
2823 ; X86-NEXT: retl # encoding: [0xc3]
2825 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
2827 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2828 ; X64-NEXT: vpmovsdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
2829 ; X64-NEXT: vpmovsdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07]
2830 ; X64-NEXT: retq # encoding: [0xc3]
2831 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
2832 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
2836 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
2838 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2839 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
2841 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2842 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2843 ; X86-NEXT: vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
2844 ; X86-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
2845 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2846 ; X86-NEXT: vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
2847 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2848 ; X86-NEXT: retl # encoding: [0xc3]
2850 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
2852 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2853 ; X64-NEXT: vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
2854 ; X64-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
2855 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2856 ; X64-NEXT: vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
2857 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2858 ; X64-NEXT: retq # encoding: [0xc3]
2859 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
2860 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
2861 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2862 %res3 = add <16 x i8> %res0, %res1
2863 %res4 = add <16 x i8> %res3, %res2
2867 declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(ptr %ptr, <4 x i32>, i8)
2869 define void @test_int_x86_avx512_mask_pmovus_db_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
2870 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
2872 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2873 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2874 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2875 ; X86-NEXT: vpmovusdb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x11,0x00]
2876 ; X86-NEXT: vpmovusdb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0x00]
2877 ; X86-NEXT: retl # encoding: [0xc3]
2879 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
2881 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2882 ; X64-NEXT: vpmovusdb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
2883 ; X64-NEXT: vpmovusdb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07]
2884 ; X64-NEXT: retq # encoding: [0xc3]
2885 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
2886 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
2890 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
2892 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2893 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256:
2895 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2896 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2897 ; X86-NEXT: vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
2898 ; X86-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
2899 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2900 ; X86-NEXT: vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
2901 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2902 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2903 ; X86-NEXT: retl # encoding: [0xc3]
2905 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_256:
2907 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2908 ; X64-NEXT: vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
2909 ; X64-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
2910 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2911 ; X64-NEXT: vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
2912 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2913 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2914 ; X64-NEXT: retq # encoding: [0xc3]
2915 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
2916 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
2917 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2918 %res3 = add <16 x i8> %res0, %res1
2919 %res4 = add <16 x i8> %res3, %res2
2923 declare void @llvm.x86.avx512.mask.pmov.db.mem.256(ptr %ptr, <8 x i32>, i8)
2925 define void @test_int_x86_avx512_mask_pmov_db_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
2926 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
2928 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2929 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2930 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2931 ; X86-NEXT: vpmovdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x31,0x00]
2932 ; X86-NEXT: vpmovdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0x00]
2933 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2934 ; X86-NEXT: retl # encoding: [0xc3]
2936 ; X64-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
2938 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2939 ; X64-NEXT: vpmovdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x31,0x07]
2940 ; X64-NEXT: vpmovdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0x07]
2941 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2942 ; X64-NEXT: retq # encoding: [0xc3]
2943 call void @llvm.x86.avx512.mask.pmov.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
2944 call void @llvm.x86.avx512.mask.pmov.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
2948 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
2950 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
2951 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
2953 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2954 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2955 ; X86-NEXT: vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
2956 ; X86-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
2957 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2958 ; X86-NEXT: vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
2959 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2960 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2961 ; X86-NEXT: retl # encoding: [0xc3]
2963 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
2965 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2966 ; X64-NEXT: vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
2967 ; X64-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
2968 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
2969 ; X64-NEXT: vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
2970 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
2971 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2972 ; X64-NEXT: retq # encoding: [0xc3]
2973 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
2974 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
2975 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
2976 %res3 = add <16 x i8> %res0, %res1
2977 %res4 = add <16 x i8> %res3, %res2
2981 declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(ptr %ptr, <8 x i32>, i8)
2983 define void @test_int_x86_avx512_mask_pmovs_db_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
2984 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
2986 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2987 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2988 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2989 ; X86-NEXT: vpmovsdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x21,0x00]
2990 ; X86-NEXT: vpmovsdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0x00]
2991 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2992 ; X86-NEXT: retl # encoding: [0xc3]
2994 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
2996 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2997 ; X64-NEXT: vpmovsdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
2998 ; X64-NEXT: vpmovsdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07]
2999 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3000 ; X64-NEXT: retq # encoding: [0xc3]
3001 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3002 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3006 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
3008 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
3009 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
3011 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3012 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3013 ; X86-NEXT: vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
3014 ; X86-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
3015 ; X86-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
3016 ; X86-NEXT: vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
3017 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
3018 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3019 ; X86-NEXT: retl # encoding: [0xc3]
3021 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
3023 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3024 ; X64-NEXT: vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
3025 ; X64-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
3026 ; X64-NEXT: vpaddb %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfc,0xc9]
3027 ; X64-NEXT: vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
3028 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
3029 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3030 ; X64-NEXT: retq # encoding: [0xc3]
3031 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
3032 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
3033 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
3034 %res3 = add <16 x i8> %res0, %res1
3035 %res4 = add <16 x i8> %res3, %res2
3039 declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(ptr %ptr, <8 x i32>, i8)
3041 define void @test_int_x86_avx512_mask_pmovus_db_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3042 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
3044 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3045 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3046 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3047 ; X86-NEXT: vpmovusdb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x11,0x00]
3048 ; X86-NEXT: vpmovusdb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0x00]
3049 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3050 ; X86-NEXT: retl # encoding: [0xc3]
3052 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
3054 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3055 ; X64-NEXT: vpmovusdb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
3056 ; X64-NEXT: vpmovusdb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07]
3057 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3058 ; X64-NEXT: retq # encoding: [0xc3]
3059 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3060 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3064 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
3066 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3067 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3069 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3070 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3071 ; X86-NEXT: vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
3072 ; X86-NEXT: vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
3073 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3074 ; X86-NEXT: vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
3075 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3076 ; X86-NEXT: retl # encoding: [0xc3]
3078 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
3080 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3081 ; X64-NEXT: vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
3082 ; X64-NEXT: vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
3083 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3084 ; X64-NEXT: vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
3085 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3086 ; X64-NEXT: retq # encoding: [0xc3]
3087 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3088 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3089 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3090 %res3 = add <8 x i16> %res0, %res1
3091 %res4 = add <8 x i16> %res3, %res2
3095 declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(ptr %ptr, <4 x i32>, i8)
3097 define void @test_int_x86_avx512_mask_pmov_dw_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
3098 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3100 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3101 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3102 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3103 ; X86-NEXT: vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
3104 ; X86-NEXT: vpmovdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0x00]
3105 ; X86-NEXT: retl # encoding: [0xc3]
3107 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
3109 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3110 ; X64-NEXT: vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
3111 ; X64-NEXT: vpmovdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0x07]
3112 ; X64-NEXT: retq # encoding: [0xc3]
3113 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
3114 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
3118 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
3120 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3121 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3123 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3124 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3125 ; X86-NEXT: vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
3126 ; X86-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
3127 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3128 ; X86-NEXT: vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
3129 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3130 ; X86-NEXT: retl # encoding: [0xc3]
3132 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
3134 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3135 ; X64-NEXT: vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
3136 ; X64-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
3137 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3138 ; X64-NEXT: vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
3139 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3140 ; X64-NEXT: retq # encoding: [0xc3]
3141 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3142 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3143 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3144 %res3 = add <8 x i16> %res0, %res1
3145 %res4 = add <8 x i16> %res3, %res2
3149 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(ptr %ptr, <4 x i32>, i8)
3151 define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
3152 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3154 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3155 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3156 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3157 ; X86-NEXT: vpmovsdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x23,0x00]
3158 ; X86-NEXT: vpmovsdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0x00]
3159 ; X86-NEXT: retl # encoding: [0xc3]
3161 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
3163 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3164 ; X64-NEXT: vpmovsdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
3165 ; X64-NEXT: vpmovsdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07]
3166 ; X64-NEXT: retq # encoding: [0xc3]
3167 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
3168 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
3172 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
3174 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3175 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3177 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3178 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3179 ; X86-NEXT: vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
3180 ; X86-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
3181 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3182 ; X86-NEXT: vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
3183 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3184 ; X86-NEXT: retl # encoding: [0xc3]
3186 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
3188 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3189 ; X64-NEXT: vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
3190 ; X64-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
3191 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3192 ; X64-NEXT: vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
3193 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3194 ; X64-NEXT: retq # encoding: [0xc3]
3195 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
3196 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
3197 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3198 %res3 = add <8 x i16> %res0, %res1
3199 %res4 = add <8 x i16> %res3, %res2
3203 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(ptr %ptr, <4 x i32>, i8)
3205 define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(ptr %ptr, <4 x i32> %x1, i8 %x2) {
3206 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3208 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3209 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3210 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3211 ; X86-NEXT: vpmovusdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x13,0x00]
3212 ; X86-NEXT: vpmovusdw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0x00]
3213 ; X86-NEXT: retl # encoding: [0xc3]
3215 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
3217 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3218 ; X64-NEXT: vpmovusdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
3219 ; X64-NEXT: vpmovusdw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07]
3220 ; X64-NEXT: retq # encoding: [0xc3]
3221 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 -1)
3222 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(ptr %ptr, <4 x i32> %x1, i8 %x2)
3226 declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
3228 define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3229 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3231 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3232 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3233 ; X86-NEXT: vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
3234 ; X86-NEXT: vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
3235 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3236 ; X86-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
3237 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3238 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3239 ; X86-NEXT: retl # encoding: [0xc3]
3241 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
3243 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3244 ; X64-NEXT: vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
3245 ; X64-NEXT: vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
3246 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3247 ; X64-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
3248 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3249 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3250 ; X64-NEXT: retq # encoding: [0xc3]
3251 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3252 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3253 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3254 %res3 = add <8 x i16> %res0, %res1
3255 %res4 = add <8 x i16> %res3, %res2
3259 declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(ptr %ptr, <8 x i32>, i8)
3261 define void @test_int_x86_avx512_mask_pmov_dw_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3262 ; X86-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3264 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3265 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3266 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3267 ; X86-NEXT: vpmovdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x33,0x00]
3268 ; X86-NEXT: vpmovdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0x00]
3269 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3270 ; X86-NEXT: retl # encoding: [0xc3]
3272 ; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
3274 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3275 ; X64-NEXT: vpmovdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x33,0x07]
3276 ; X64-NEXT: vpmovdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0x07]
3277 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3278 ; X64-NEXT: retq # encoding: [0xc3]
3279 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3280 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3284 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
3286 define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3287 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3289 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3290 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3291 ; X86-NEXT: vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
3292 ; X86-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
3293 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3294 ; X86-NEXT: vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
3295 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3296 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3297 ; X86-NEXT: retl # encoding: [0xc3]
3299 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
3301 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3302 ; X64-NEXT: vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
3303 ; X64-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
3304 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3305 ; X64-NEXT: vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
3306 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3307 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3308 ; X64-NEXT: retq # encoding: [0xc3]
3309 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3310 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3311 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3312 %res3 = add <8 x i16> %res0, %res1
3313 %res4 = add <8 x i16> %res3, %res2
3317 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(ptr %ptr, <8 x i32>, i8)
3319 define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3320 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3322 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3323 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3324 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3325 ; X86-NEXT: vpmovsdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x23,0x00]
3326 ; X86-NEXT: vpmovsdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0x00]
3327 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3328 ; X86-NEXT: retl # encoding: [0xc3]
3330 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
3332 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3333 ; X64-NEXT: vpmovsdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
3334 ; X64-NEXT: vpmovsdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07]
3335 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3336 ; X64-NEXT: retq # encoding: [0xc3]
3337 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3338 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3342 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
3344 define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
3345 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3347 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3348 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3349 ; X86-NEXT: vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
3350 ; X86-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
3351 ; X86-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3352 ; X86-NEXT: vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
3353 ; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3354 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3355 ; X86-NEXT: retl # encoding: [0xc3]
3357 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
3359 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3360 ; X64-NEXT: vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
3361 ; X64-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
3362 ; X64-NEXT: vpaddw %xmm1, %xmm2, %xmm1 # encoding: [0xc5,0xe9,0xfd,0xc9]
3363 ; X64-NEXT: vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
3364 ; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
3365 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3366 ; X64-NEXT: retq # encoding: [0xc3]
3367 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
3368 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
3369 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
3370 %res3 = add <8 x i16> %res0, %res1
3371 %res4 = add <8 x i16> %res3, %res2
3375 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(ptr %ptr, <8 x i32>, i8)
3377 define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(ptr %ptr, <8 x i32> %x1, i8 %x2) {
3378 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3380 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3381 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3382 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3383 ; X86-NEXT: vpmovusdw %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x13,0x00]
3384 ; X86-NEXT: vpmovusdw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0x00]
3385 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3386 ; X86-NEXT: retl # encoding: [0xc3]
3388 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
3390 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3391 ; X64-NEXT: vpmovusdw %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
3392 ; X64-NEXT: vpmovusdw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07]
3393 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3394 ; X64-NEXT: retq # encoding: [0xc3]
3395 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 -1)
3396 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(ptr %ptr, <8 x i32> %x1, i8 %x2)
3400 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
3402 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3403 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
3405 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3406 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3407 ; X86-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3408 ; X86-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3409 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3410 ; X86-NEXT: retl # encoding: [0xc3]
3412 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
3414 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3415 ; X64-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3416 ; X64-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3417 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
3418 ; X64-NEXT: retq # encoding: [0xc3]
3419 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3420 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3421 %res2 = add <4 x i32> %res, %res1
3425 define <4 x i32>@test_int_x86_avx512_cvt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3426 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2dq_128_zext:
3428 ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
3429 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3430 %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3431 %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3435 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3436 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128_zext:
3438 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3439 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3440 ; X86-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3441 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3442 ; X86-NEXT: retl # encoding: [0xc3]
3444 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128_zext:
3446 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3447 ; X64-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8]
3448 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3449 ; X64-NEXT: retq # encoding: [0xc3]
3450 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3451 %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3455 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
3457 define <4 x float>@test_int_x86_avx512_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1) {
3458 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2ps:
3460 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3461 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3462 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
3463 ret <4 x float> %res
3466 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
3467 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
3469 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3470 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3471 ; X86-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3472 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3473 ; X86-NEXT: retl # encoding: [0xc3]
3475 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
3477 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3478 ; X64-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3479 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3480 ; X64-NEXT: retq # encoding: [0xc3]
3481 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
3482 ret <4 x float> %res
3485 define <4 x float>@test_int_x86_avx512_cvt_pd2ps_zext(<2 x double> %x0, <4 x float> %x1) {
3486 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2ps_zext:
3488 ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
3489 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3490 %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
3491 %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3492 ret <4 x float> %res3
3495 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_zext(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
3496 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_zext:
3498 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3499 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3500 ; X86-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3501 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3502 ; X86-NEXT: retl # encoding: [0xc3]
3504 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_zext:
3506 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3507 ; X64-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8]
3508 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3509 ; X64-NEXT: retq # encoding: [0xc3]
3510 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
3511 %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3512 ret <4 x float> %res1
3515 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
3517 define <4 x i32>@test_int_x86_avx512_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1) {
3518 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2udq_128:
3520 ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3521 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3522 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3526 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3527 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
3529 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3530 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3531 ; X86-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3532 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3533 ; X86-NEXT: retl # encoding: [0xc3]
3535 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
3537 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3538 ; X64-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3539 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3540 ; X64-NEXT: retq # encoding: [0xc3]
3541 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3545 define <4 x i32>@test_int_x86_avx512_cvt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3546 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2udq_128_zext:
3548 ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0]
3549 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3550 %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3551 %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3555 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3556 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128_zext:
3558 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3559 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3560 ; X86-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3561 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3562 ; X86-NEXT: retl # encoding: [0xc3]
3564 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128_zext:
3566 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3567 ; X64-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8]
3568 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3569 ; X64-NEXT: retq # encoding: [0xc3]
3570 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3571 %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3575 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
3577 define <4 x i32>@test_int_x86_avx512_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1) {
3578 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2udq_256:
3580 ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x79,0xc0]
3581 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3582 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3583 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
3587 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
3588 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
3590 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3591 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3592 ; X86-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8]
3593 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3594 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3595 ; X86-NEXT: retl # encoding: [0xc3]
3597 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
3599 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3600 ; X64-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8]
3601 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3602 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3603 ; X64-NEXT: retq # encoding: [0xc3]
3604 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
3608 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
3610 define <4 x i32>@test_int_x86_avx512_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1) {
3611 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2dq_128:
3613 ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
3614 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3615 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3619 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3620 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
3622 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3623 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3624 ; X86-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8]
3625 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3626 ; X86-NEXT: retl # encoding: [0xc3]
3628 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
3630 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3631 ; X64-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8]
3632 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3633 ; X64-NEXT: retq # encoding: [0xc3]
3634 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3638 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
3640 define <8 x i32>@test_int_x86_avx512_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1) {
3641 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2dq_256:
3643 ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0]
3644 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3645 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3649 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3650 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
3652 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3653 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3654 ; X86-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8]
3655 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3656 ; X86-NEXT: retl # encoding: [0xc3]
3658 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
3660 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3661 ; X64-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8]
3662 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3663 ; X64-NEXT: retq # encoding: [0xc3]
3664 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3668 declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
3670 define <4 x i32>@test_int_x86_avx512_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1) {
3671 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2udq_128:
3673 ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x79,0xc0]
3674 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3675 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3679 define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3680 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
3682 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3683 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3684 ; X86-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8]
3685 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3686 ; X86-NEXT: retl # encoding: [0xc3]
3688 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
3690 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3691 ; X64-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8]
3692 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3693 ; X64-NEXT: retq # encoding: [0xc3]
3694 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3698 declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
3700 define <8 x i32>@test_int_x86_avx512_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1) {
3701 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2udq_256:
3703 ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x79,0xc0]
3704 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3705 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3709 define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3710 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
3712 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3713 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3714 ; X86-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8]
3715 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3716 ; X86-NEXT: retl # encoding: [0xc3]
3718 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
3720 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3721 ; X64-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8]
3722 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3723 ; X64-NEXT: retq # encoding: [0xc3]
3724 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3728 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
3730 define <4 x i32>@test_int_x86_avx512_ask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1) {
3731 ; CHECK-LABEL: test_int_x86_avx512_ask_cvtt_pd2dq_128:
3733 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3734 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3735 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3739 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3740 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
3742 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3743 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3744 ; X86-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3745 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3746 ; X86-NEXT: retl # encoding: [0xc3]
3748 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
3750 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3751 ; X64-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3752 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3753 ; X64-NEXT: retq # encoding: [0xc3]
3754 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3758 define <4 x i32>@test_int_x86_avx512_cvtt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3759 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2dq_128_zext:
3761 ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
3762 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3763 %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3764 %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3768 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3769 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128_zext:
3771 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3772 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3773 ; X86-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3774 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3775 ; X86-NEXT: retl # encoding: [0xc3]
3777 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128_zext:
3779 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3780 ; X64-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8]
3781 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3782 ; X64-NEXT: retq # encoding: [0xc3]
3783 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3784 %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3788 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
3790 define <4 x i32>@test_int_x86_avx512_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1) {
3791 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2udq_128:
3793 ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3794 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3795 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3799 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3800 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
3802 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3803 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3804 ; X86-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3805 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3806 ; X86-NEXT: retl # encoding: [0xc3]
3808 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
3810 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3811 ; X64-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3812 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3813 ; X64-NEXT: retq # encoding: [0xc3]
3814 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3818 define <4 x i32>@test_int_x86_avx512_cvtt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1) {
3819 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2udq_128_zext:
3821 ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0]
3822 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3823 %res2 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
3824 %res3 = shufflevector <4 x i32> %res2, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3828 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128_zext(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
3829 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128_zext:
3831 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3832 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3833 ; X86-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3834 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3835 ; X86-NEXT: retl # encoding: [0xc3]
3837 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128_zext:
3839 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3840 ; X64-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8]
3841 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3842 ; X64-NEXT: retq # encoding: [0xc3]
3843 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
3844 %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
3848 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
3850 define <4 x i32>@test_int_x86_avx512_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1) {
3851 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2udq_256:
3853 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0]
3854 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3855 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3856 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
3860 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
3861 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
3863 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3864 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3865 ; X86-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
3866 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3867 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3868 ; X86-NEXT: retl # encoding: [0xc3]
3870 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
3872 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3873 ; X64-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
3874 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3875 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
3876 ; X64-NEXT: retq # encoding: [0xc3]
3877 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
3881 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
3883 define <4 x i32>@test_int_x86_avx512_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1) {
3884 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2udq_128:
3886 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0]
3887 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3888 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
3892 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
3893 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
3895 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3896 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3897 ; X86-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
3898 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3899 ; X86-NEXT: retl # encoding: [0xc3]
3901 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
3903 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3904 ; X64-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
3905 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
3906 ; X64-NEXT: retq # encoding: [0xc3]
3907 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
3911 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
3913 define <8 x i32>@test_int_x86_avx512_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1) {
3914 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2udq_256:
3916 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0]
3917 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3918 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
3922 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
3923 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
3925 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3926 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3927 ; X86-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
3928 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3929 ; X86-NEXT: retl # encoding: [0xc3]
3931 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
3933 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3934 ; X64-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
3935 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
3936 ; X64-NEXT: retq # encoding: [0xc3]
3937 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
3941 declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
3943 define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
3944 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_pd_128:
3946 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3947 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3948 ; X86-NEXT: vrndscalepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04]
3949 ; X86-NEXT: vrndscalepd $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58]
3950 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3951 ; X86-NEXT: retl # encoding: [0xc3]
3953 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_pd_128:
3955 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3956 ; X64-NEXT: vrndscalepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04]
3957 ; X64-NEXT: vrndscalepd $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58]
3958 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
3959 ; X64-NEXT: retq # encoding: [0xc3]
3960 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
3961 %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
3962 %res2 = fadd <2 x double> %res, %res1
3963 ret <2 x double> %res2
3966 declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
3968 define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
3969 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_pd_256:
3971 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3972 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3973 ; X86-NEXT: vrndscalepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04]
3974 ; X86-NEXT: vrndscalepd $88, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58]
3975 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3976 ; X86-NEXT: retl # encoding: [0xc3]
3978 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_pd_256:
3980 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3981 ; X64-NEXT: vrndscalepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04]
3982 ; X64-NEXT: vrndscalepd $88, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58]
3983 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
3984 ; X64-NEXT: retq # encoding: [0xc3]
3985 %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
3986 %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
3987 %res2 = fadd <4 x double> %res, %res1
3988 ret <4 x double> %res2
3991 declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
3993 define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
3994 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_ps_128:
3996 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3997 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3998 ; X86-NEXT: vrndscaleps $88, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58]
3999 ; X86-NEXT: vroundps $4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x04]
4000 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4001 ; X86-NEXT: retl # encoding: [0xc3]
4003 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_ps_128:
4005 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4006 ; X64-NEXT: vrndscaleps $88, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58]
4007 ; X64-NEXT: vroundps $4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x04]
4008 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4009 ; X64-NEXT: retq # encoding: [0xc3]
4010 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
4011 %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
4012 %res2 = fadd <4 x float> %res, %res1
4013 ret <4 x float> %res2
4016 declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
4018 define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4019 ; X86-LABEL: test_int_x86_avx512_mask_rndscale_ps_256:
4021 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4022 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4023 ; X86-NEXT: vrndscaleps $5, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05]
4024 ; X86-NEXT: vrndscaleps $66, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42]
4025 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4026 ; X86-NEXT: retl # encoding: [0xc3]
4028 ; X64-LABEL: test_int_x86_avx512_mask_rndscale_ps_256:
4030 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4031 ; X64-NEXT: vrndscaleps $5, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05]
4032 ; X64-NEXT: vrndscaleps $66, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42]
4033 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4034 ; X64-NEXT: retq # encoding: [0xc3]
4035 %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
4036 %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
4037 %res2 = fadd <8 x float> %res, %res1
4038 ret <8 x float> %res2
4041 declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
4043 define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
4044 ; X86-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
4046 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4047 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4048 ; X86-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b]
4049 ; X86-NEXT: vgetmantpd $12, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0c]
4050 ; X86-NEXT: vgetmantpd $13, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0d]
4051 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
4052 ; X86-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
4053 ; X86-NEXT: retl # encoding: [0xc3]
4055 ; X64-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
4057 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4058 ; X64-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b]
4059 ; X64-NEXT: vgetmantpd $12, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0c]
4060 ; X64-NEXT: vgetmantpd $13, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0d]
4061 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
4062 ; X64-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
4063 ; X64-NEXT: retq # encoding: [0xc3]
4064 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
4065 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 12, <2 x double> zeroinitializer, i8 %x3)
4066 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 13, <2 x double> %x2, i8 -1)
4067 %res3 = fadd <2 x double> %res, %res1
4068 %res4 = fadd <2 x double> %res2, %res3
4069 ret <2 x double> %res4
4072 declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
4074 define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
4075 ; X86-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
4077 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4078 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4079 ; X86-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b]
4080 ; X86-NEXT: vgetmantpd $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0c]
4081 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
4082 ; X86-NEXT: retl # encoding: [0xc3]
4084 ; X64-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
4086 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4087 ; X64-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b]
4088 ; X64-NEXT: vgetmantpd $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0c]
4089 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
4090 ; X64-NEXT: retq # encoding: [0xc3]
4091 %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
4092 %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 12, <4 x double> %x2, i8 -1)
4093 %res2 = fadd <4 x double> %res, %res1
4094 ret <4 x double> %res2
4097 declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
4099 define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
4100 ; X86-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
4102 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4103 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4104 ; X86-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b]
4105 ; X86-NEXT: vgetmantps $12, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0c]
4106 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4107 ; X86-NEXT: retl # encoding: [0xc3]
4109 ; X64-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
4111 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4112 ; X64-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b]
4113 ; X64-NEXT: vgetmantps $12, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0c]
4114 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
4115 ; X64-NEXT: retq # encoding: [0xc3]
4116 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
4117 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 12, <4 x float> %x2, i8 -1)
4118 %res2 = fadd <4 x float> %res, %res1
4119 ret <4 x float> %res2
4122 declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
4124 define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
4125 ; X86-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
4127 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4128 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4129 ; X86-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b]
4130 ; X86-NEXT: vgetmantps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0c]
4131 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4132 ; X86-NEXT: retl # encoding: [0xc3]
4134 ; X64-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
4136 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4137 ; X64-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b]
4138 ; X64-NEXT: vgetmantps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0c]
4139 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
4140 ; X64-NEXT: retq # encoding: [0xc3]
4141 %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
4142 %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 12, <8 x float> %x2, i8 -1)
4143 %res2 = fadd <8 x float> %res, %res1
4144 ret <8 x float> %res2
4147 declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32)
4149 define <4 x i32>@test_int_x86_avx512_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
4150 ; CHECK-LABEL: test_int_x86_avx512_pternlog_d_128:
4152 ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21]
4153 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4154 %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4158 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
4159 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
4161 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4162 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4163 ; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
4164 ; X86-NEXT: retl # encoding: [0xc3]
4166 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
4168 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4169 ; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
4170 ; X64-NEXT: retq # encoding: [0xc3]
4171 %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4172 %2 = bitcast i8 %x4 to <8 x i1>
4173 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4174 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
4178 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
4180 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
4181 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
4183 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4184 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4185 ; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
4186 ; X86-NEXT: retl # encoding: [0xc3]
4188 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
4190 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4191 ; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
4192 ; X64-NEXT: retq # encoding: [0xc3]
4193 %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
4194 %2 = bitcast i8 %x4 to <8 x i1>
4195 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4196 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
4200 declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32)
4202 define <8 x i32>@test_int_x86_avx512_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
4203 ; CHECK-LABEL: test_int_x86_avx512_pternlog_d_256:
4205 ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21]
4206 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4207 %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4211 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4212 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
4214 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4215 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4216 ; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
4217 ; X86-NEXT: retl # encoding: [0xc3]
4219 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
4221 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4222 ; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
4223 ; X64-NEXT: retq # encoding: [0xc3]
4224 %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4225 %2 = bitcast i8 %x4 to <8 x i1>
4226 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
4230 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
4232 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
4233 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
4235 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4236 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4237 ; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
4238 ; X86-NEXT: retl # encoding: [0xc3]
4240 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
4242 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4243 ; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
4244 ; X64-NEXT: retq # encoding: [0xc3]
4245 %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
4246 %2 = bitcast i8 %x4 to <8 x i1>
4247 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
4251 declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32)
4253 define <2 x i64>@test_int_x86_avx512_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
4254 ; CHECK-LABEL: test_int_x86_avx512_pternlog_q_128:
4256 ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21]
4257 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4258 %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4262 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4263 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
4265 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4266 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4267 ; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
4268 ; X86-NEXT: retl # encoding: [0xc3]
4270 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
4272 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4273 ; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
4274 ; X64-NEXT: retq # encoding: [0xc3]
4275 %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4276 %2 = bitcast i8 %x4 to <8 x i1>
4277 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
4278 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
4282 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
4283 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
4285 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4286 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4287 ; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
4288 ; X86-NEXT: retl # encoding: [0xc3]
4290 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
4292 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4293 ; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
4294 ; X64-NEXT: retq # encoding: [0xc3]
4295 %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
4296 %2 = bitcast i8 %x4 to <8 x i1>
4297 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
4298 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
4302 declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32)
4304 define <4 x i64>@test_int_x86_avx512_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
4305 ; CHECK-LABEL: test_int_x86_avx512_pternlog_q_256:
4307 ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21]
4308 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4309 %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4313 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4314 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
4316 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4317 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4318 ; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
4319 ; X86-NEXT: retl # encoding: [0xc3]
4321 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
4323 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4324 ; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
4325 ; X64-NEXT: retq # encoding: [0xc3]
4326 %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4327 %2 = bitcast i8 %x4 to <8 x i1>
4328 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4329 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
4333 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
4334 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
4336 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4337 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4338 ; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
4339 ; X86-NEXT: retl # encoding: [0xc3]
4341 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
4343 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4344 ; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
4345 ; X64-NEXT: retq # encoding: [0xc3]
4346 %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
4347 %2 = bitcast i8 %x4 to <8 x i1>
4348 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4349 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
4353 define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %src) {
4354 ; X86-LABEL: test_x86_vcvtps2ph_128:
4356 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4357 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4358 ; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
4359 ; X86-NEXT: vcvtps2ph $10, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x0a]
4360 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4361 ; X86-NEXT: vcvtps2ph $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x0b]
4362 ; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4363 ; X86-NEXT: retl # encoding: [0xc3]
4365 ; X64-LABEL: test_x86_vcvtps2ph_128:
4367 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4368 ; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
4369 ; X64-NEXT: vcvtps2ph $10, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x0a]
4370 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4371 ; X64-NEXT: vcvtps2ph $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x0b]
4372 ; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4373 ; X64-NEXT: retq # encoding: [0xc3]
4374 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
4375 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 10, <8 x i16> zeroinitializer, i8 %mask)
4376 %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 11, <8 x i16> %src, i8 %mask)
4377 %res0 = add <8 x i16> %res1, %res2
4378 %res = add <8 x i16> %res3, %res0
4382 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
4384 define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %src) {
4385 ; X86-LABEL: test_x86_vcvtps2ph_256:
4387 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4388 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4389 ; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
4390 ; X86-NEXT: vcvtps2ph $11, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x0b]
4391 ; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4392 ; X86-NEXT: vcvtps2ph $12, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x0c]
4393 ; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4394 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
4395 ; X86-NEXT: retl # encoding: [0xc3]
4397 ; X64-LABEL: test_x86_vcvtps2ph_256:
4399 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4400 ; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
4401 ; X64-NEXT: vcvtps2ph $11, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x0b]
4402 ; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xfd,0xd3]
4403 ; X64-NEXT: vcvtps2ph $12, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x0c]
4404 ; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc2]
4405 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
4406 ; X64-NEXT: retq # encoding: [0xc3]
4407 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
4408 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 11, <8 x i16> zeroinitializer, i8 %mask)
4409 %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 12, <8 x i16> %src, i8 %mask)
4410 %res0 = add <8 x i16> %res1, %res2
4411 %res = add <8 x i16> %res3, %res0
4415 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
4417 define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
4418 ; CHECK-LABEL: test_rsqrt_ps_256_rr:
4420 ; CHECK-NEXT: vrsqrt14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4e,0xc0]
4421 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4422 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
4423 ret <8 x float> %res
4426 define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
4427 ; X86-LABEL: test_rsqrt_ps_256_rrkz:
4429 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4430 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4431 ; X86-NEXT: vrsqrt14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0]
4432 ; X86-NEXT: retl # encoding: [0xc3]
4434 ; X64-LABEL: test_rsqrt_ps_256_rrkz:
4436 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4437 ; X64-NEXT: vrsqrt14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0]
4438 ; X64-NEXT: retq # encoding: [0xc3]
4439 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
4440 ret <8 x float> %res
4443 define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4444 ; X86-LABEL: test_rsqrt_ps_256_rrk:
4446 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4447 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4448 ; X86-NEXT: vrsqrt14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8]
4449 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4450 ; X86-NEXT: retl # encoding: [0xc3]
4452 ; X64-LABEL: test_rsqrt_ps_256_rrk:
4454 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4455 ; X64-NEXT: vrsqrt14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8]
4456 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4457 ; X64-NEXT: retq # encoding: [0xc3]
4458 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
4459 ret <8 x float> %res
4462 define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) {
4463 ; CHECK-LABEL: test_rsqrt_ps_128_rr:
4465 ; CHECK-NEXT: vrsqrt14ps %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x4e,0xc0]
4466 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4467 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
4468 ret <4 x float> %res
4471 define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
4472 ; X86-LABEL: test_rsqrt_ps_128_rrkz:
4474 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4475 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4476 ; X86-NEXT: vrsqrt14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0]
4477 ; X86-NEXT: retl # encoding: [0xc3]
4479 ; X64-LABEL: test_rsqrt_ps_128_rrkz:
4481 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4482 ; X64-NEXT: vrsqrt14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0]
4483 ; X64-NEXT: retq # encoding: [0xc3]
4484 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
4485 ret <4 x float> %res
4488 define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4489 ; X86-LABEL: test_rsqrt_ps_128_rrk:
4491 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4492 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4493 ; X86-NEXT: vrsqrt14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8]
4494 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4495 ; X86-NEXT: retl # encoding: [0xc3]
4497 ; X64-LABEL: test_rsqrt_ps_128_rrk:
4499 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4500 ; X64-NEXT: vrsqrt14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8]
4501 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4502 ; X64-NEXT: retq # encoding: [0xc3]
4503 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
4504 ret <4 x float> %res
4507 declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
4508 declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
4510 define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) {
4511 ; CHECK-LABEL: test_rcp_ps_256_rr:
4513 ; CHECK-NEXT: vrcp14ps %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x4c,0xc0]
4514 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4515 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
4516 ret <8 x float> %res
4519 define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
4520 ; X86-LABEL: test_rcp_ps_256_rrkz:
4522 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4523 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4524 ; X86-NEXT: vrcp14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0]
4525 ; X86-NEXT: retl # encoding: [0xc3]
4527 ; X64-LABEL: test_rcp_ps_256_rrkz:
4529 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4530 ; X64-NEXT: vrcp14ps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0]
4531 ; X64-NEXT: retq # encoding: [0xc3]
4532 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
4533 ret <8 x float> %res
4536 define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4537 ; X86-LABEL: test_rcp_ps_256_rrk:
4539 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4540 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4541 ; X86-NEXT: vrcp14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8]
4542 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4543 ; X86-NEXT: retl # encoding: [0xc3]
4545 ; X64-LABEL: test_rcp_ps_256_rrk:
4547 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4548 ; X64-NEXT: vrcp14ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8]
4549 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
4550 ; X64-NEXT: retq # encoding: [0xc3]
4551 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
4552 ret <8 x float> %res
4555 define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) {
4556 ; CHECK-LABEL: test_rcp_ps_128_rr:
4558 ; CHECK-NEXT: vrcp14ps %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x4c,0xc0]
4559 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4560 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
4561 ret <4 x float> %res
4564 define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
4565 ; X86-LABEL: test_rcp_ps_128_rrkz:
4567 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4568 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4569 ; X86-NEXT: vrcp14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0]
4570 ; X86-NEXT: retl # encoding: [0xc3]
4572 ; X64-LABEL: test_rcp_ps_128_rrkz:
4574 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4575 ; X64-NEXT: vrcp14ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0]
4576 ; X64-NEXT: retq # encoding: [0xc3]
4577 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
4578 ret <4 x float> %res
4581 define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4582 ; X86-LABEL: test_rcp_ps_128_rrk:
4584 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4585 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4586 ; X86-NEXT: vrcp14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8]
4587 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4588 ; X86-NEXT: retl # encoding: [0xc3]
4590 ; X64-LABEL: test_rcp_ps_128_rrk:
4592 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4593 ; X64-NEXT: vrcp14ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8]
4594 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
4595 ; X64-NEXT: retq # encoding: [0xc3]
4596 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
4597 ret <4 x float> %res
4600 declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
4601 declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
4603 define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) {
4604 ; CHECK-LABEL: test_rsqrt_pd_256_rr:
4606 ; CHECK-NEXT: vrsqrt14pd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x4e,0xc0]
4607 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4608 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
4609 ret <4 x double> %res
4612 define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
4613 ; X86-LABEL: test_rsqrt_pd_256_rrkz:
4615 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4616 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4617 ; X86-NEXT: vrsqrt14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0]
4618 ; X86-NEXT: retl # encoding: [0xc3]
4620 ; X64-LABEL: test_rsqrt_pd_256_rrkz:
4622 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4623 ; X64-NEXT: vrsqrt14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0]
4624 ; X64-NEXT: retq # encoding: [0xc3]
4625 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
4626 ret <4 x double> %res
4629 define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
4630 ; X86-LABEL: test_rsqrt_pd_256_rrk:
4632 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4633 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4634 ; X86-NEXT: vrsqrt14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8]
4635 ; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4636 ; X86-NEXT: retl # encoding: [0xc3]
4638 ; X64-LABEL: test_rsqrt_pd_256_rrk:
4640 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4641 ; X64-NEXT: vrsqrt14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8]
4642 ; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4643 ; X64-NEXT: retq # encoding: [0xc3]
4644 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
4645 ret <4 x double> %res
4648 define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) {
4649 ; CHECK-LABEL: test_rsqrt_pd_128_rr:
4651 ; CHECK-NEXT: vrsqrt14pd %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x4e,0xc0]
4652 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4653 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
4654 ret <2 x double> %res
4657 define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
4658 ; X86-LABEL: test_rsqrt_pd_128_rrkz:
4660 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4661 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4662 ; X86-NEXT: vrsqrt14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0]
4663 ; X86-NEXT: retl # encoding: [0xc3]
4665 ; X64-LABEL: test_rsqrt_pd_128_rrkz:
4667 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4668 ; X64-NEXT: vrsqrt14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0]
4669 ; X64-NEXT: retq # encoding: [0xc3]
4670 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
4671 ret <2 x double> %res
4674 define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4675 ; X86-LABEL: test_rsqrt_pd_128_rrk:
4677 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4678 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4679 ; X86-NEXT: vrsqrt14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8]
4680 ; X86-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4681 ; X86-NEXT: retl # encoding: [0xc3]
4683 ; X64-LABEL: test_rsqrt_pd_128_rrk:
4685 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4686 ; X64-NEXT: vrsqrt14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8]
4687 ; X64-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4688 ; X64-NEXT: retq # encoding: [0xc3]
4689 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
4690 ret <2 x double> %res
4693 declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
4694 declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
4696 define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) {
4697 ; CHECK-LABEL: test_rcp_pd_256_rr:
4699 ; CHECK-NEXT: vrcp14pd %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x4c,0xc0]
4700 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4701 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
4702 ret <4 x double> %res
4705 define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
4706 ; X86-LABEL: test_rcp_pd_256_rrkz:
4708 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4709 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4710 ; X86-NEXT: vrcp14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0]
4711 ; X86-NEXT: retl # encoding: [0xc3]
4713 ; X64-LABEL: test_rcp_pd_256_rrkz:
4715 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4716 ; X64-NEXT: vrcp14pd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0]
4717 ; X64-NEXT: retq # encoding: [0xc3]
4718 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
4719 ret <4 x double> %res
4722 define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
4723 ; X86-LABEL: test_rcp_pd_256_rrk:
4725 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4726 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4727 ; X86-NEXT: vrcp14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8]
4728 ; X86-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4729 ; X86-NEXT: retl # encoding: [0xc3]
4731 ; X64-LABEL: test_rcp_pd_256_rrk:
4733 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4734 ; X64-NEXT: vrcp14pd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8]
4735 ; X64-NEXT: vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
4736 ; X64-NEXT: retq # encoding: [0xc3]
4737 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
4738 ret <4 x double> %res
4741 define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) {
4742 ; CHECK-LABEL: test_rcp_pd_128_rr:
4744 ; CHECK-NEXT: vrcp14pd %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x4c,0xc0]
4745 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4746 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
4747 ret <2 x double> %res
4750 define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
4751 ; X86-LABEL: test_rcp_pd_128_rrkz:
4753 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4754 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4755 ; X86-NEXT: vrcp14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0]
4756 ; X86-NEXT: retl # encoding: [0xc3]
4758 ; X64-LABEL: test_rcp_pd_128_rrkz:
4760 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4761 ; X64-NEXT: vrcp14pd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0]
4762 ; X64-NEXT: retq # encoding: [0xc3]
4763 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
4764 ret <2 x double> %res
4767 define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4768 ; X86-LABEL: test_rcp_pd_128_rrk:
4770 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4771 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4772 ; X86-NEXT: vrcp14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8]
4773 ; X86-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4774 ; X86-NEXT: retl # encoding: [0xc3]
4776 ; X64-LABEL: test_rcp_pd_128_rrk:
4778 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4779 ; X64-NEXT: vrcp14pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8]
4780 ; X64-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
4781 ; X64-NEXT: retq # encoding: [0xc3]
4782 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
4783 ret <2 x double> %res
4786 declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
4787 declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
4789 declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
4791 define <4 x double>@test_int_x86_avx512_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
4792 ; CHECK-LABEL: test_int_x86_avx512_permvar_df_256:
4794 ; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0]
4795 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4796 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4800 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
4801 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256:
4803 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4804 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4805 ; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
4806 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
4807 ; X86-NEXT: retl # encoding: [0xc3]
4809 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256:
4811 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4812 ; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
4813 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
4814 ; X64-NEXT: retq # encoding: [0xc3]
4815 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4816 %2 = bitcast i8 %x3 to <8 x i1>
4817 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4818 %3 = select <4 x i1> %extract1, <4 x double> %1, <4 x double> %x2
4822 define <4 x double>@test_int_x86_avx512_maskz_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, i8 %x3) {
4823 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_df_256:
4825 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4826 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4827 ; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
4828 ; X86-NEXT: retl # encoding: [0xc3]
4830 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_df_256:
4832 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4833 ; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
4834 ; X64-NEXT: retq # encoding: [0xc3]
4835 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
4836 %2 = bitcast i8 %x3 to <8 x i1>
4837 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4838 %3 = select <4 x i1> %extract1, <4 x double> %1, <4 x double> zeroinitializer
4842 declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
4844 define <4 x i64>@test_int_x86_avx512_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
4845 ; CHECK-LABEL: test_int_x86_avx512_permvar_di_256:
4847 ; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0]
4848 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4849 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4853 define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
4854 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256:
4856 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4857 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4858 ; X86-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
4859 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4860 ; X86-NEXT: retl # encoding: [0xc3]
4862 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256:
4864 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4865 ; X64-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
4866 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4867 ; X64-NEXT: retq # encoding: [0xc3]
4868 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4869 %2 = bitcast i8 %x3 to <8 x i1>
4870 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4871 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
4875 define <4 x i64>@test_int_x86_avx512_maskz_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) {
4876 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_di_256:
4878 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4879 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4880 ; X86-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
4881 ; X86-NEXT: retl # encoding: [0xc3]
4883 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_di_256:
4885 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4886 ; X64-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
4887 ; X64-NEXT: retq # encoding: [0xc3]
4888 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
4889 %2 = bitcast i8 %x3 to <8 x i1>
4890 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4891 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer
4895 declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
4897 define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4898 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
4900 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4901 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4902 ; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4903 ; X86-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
4904 ; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4905 ; X86-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
4906 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
4907 ; X86-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
4908 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4909 ; X86-NEXT: retl # encoding: [0xc3]
4911 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
4913 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4914 ; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4915 ; X64-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
4916 ; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4917 ; X64-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
4918 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xdc]
4919 ; X64-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03]
4920 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4921 ; X64-NEXT: retq # encoding: [0xc3]
4922 %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4)
4923 %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4)
4924 %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 3, i8 -1)
4925 %res3 = fadd <2 x double> %res, %res1
4926 %res4 = fadd <2 x double> %res3, %res2
4927 ret <2 x double> %res4
4930 declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
4932 define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4933 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
4935 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4936 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4937 ; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4938 ; X86-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
4939 ; X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
4940 ; X86-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
4941 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4942 ; X86-NEXT: retl # encoding: [0xc3]
4944 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
4946 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4947 ; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
4948 ; X64-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
4949 ; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x57,0xd2]
4950 ; X64-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
4951 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0]
4952 ; X64-NEXT: retq # encoding: [0xc3]
4953 %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4)
4954 %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4)
4955 ;%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 -1)
4956 %res3 = fadd <2 x double> %res, %res1
4957 ;%res4 = fadd <2 x double> %res3, %res2
4958 ret <2 x double> %res3
4961 declare <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
4963 define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
4964 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
4966 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4967 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4968 ; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
4969 ; X86-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
4970 ; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4971 ; X86-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
4972 ; X86-NEXT: vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
4973 ; X86-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
4974 ; X86-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
4975 ; X86-NEXT: retl # encoding: [0xc3]
4977 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
4979 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4980 ; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
4981 ; X64-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
4982 ; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
4983 ; X64-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
4984 ; X64-NEXT: vaddpd %ymm4, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdc]
4985 ; X64-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
4986 ; X64-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
4987 ; X64-NEXT: retq # encoding: [0xc3]
4988 %res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4)
4989 %res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4)
4990 %res2 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
4991 %res3 = fadd <4 x double> %res, %res1
4992 %res4 = fadd <4 x double> %res3, %res2
4993 ret <4 x double> %res4
4996 declare <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
4998 define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
4999 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
5001 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5002 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5003 ; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
5004 ; X86-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
5005 ; X86-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
5006 ; X86-NEXT: vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
5007 ; X86-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
5008 ; X86-NEXT: vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
5009 ; X86-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
5010 ; X86-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
5011 ; X86-NEXT: retl # encoding: [0xc3]
5013 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
5015 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5016 ; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
5017 ; X64-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
5018 ; X64-NEXT: vxorpd %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0x57,0xe4]
5019 ; X64-NEXT: vmovapd %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8]
5020 ; X64-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
5021 ; X64-NEXT: vaddpd %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xdd]
5022 ; X64-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
5023 ; X64-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
5024 ; X64-NEXT: retq # encoding: [0xc3]
5025 %res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4)
5026 %res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4)
5027 %res2 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
5028 %res3 = fadd <4 x double> %res, %res1
5029 %res4 = fadd <4 x double> %res3, %res2
5030 ret <4 x double> %res4
5033 declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
5035 define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
5036 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
5038 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5039 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5040 ; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5041 ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
5042 ; X86-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5043 ; X86-NEXT: vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5044 ; X86-NEXT: vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xec,0x06]
5045 ; X86-NEXT: vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5046 ; X86-NEXT: vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5047 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5048 ; X86-NEXT: retl # encoding: [0xc3]
5050 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
5052 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5053 ; X64-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5054 ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
5055 ; X64-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5056 ; X64-NEXT: vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5057 ; X64-NEXT: vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x54,0xec,0x06]
5058 ; X64-NEXT: vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5059 ; X64-NEXT: vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5060 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5061 ; X64-NEXT: retq # encoding: [0xc3]
5062 %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
5063 %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 6, i8 %x4)
5064 %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 7, i8 -1)
5065 %res3 = fadd <4 x float> %res, %res1
5066 %res4 = fadd <4 x float> %res3, %res2
5067 ret <4 x float> %res4
5070 declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
5072 define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
5073 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
5075 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5076 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5077 ; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5078 ; X86-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
5079 ; X86-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5080 ; X86-NEXT: vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5081 ; X86-NEXT: vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xec,0x06]
5082 ; X86-NEXT: vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5083 ; X86-NEXT: vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5084 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5085 ; X86-NEXT: retl # encoding: [0xc3]
5087 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
5089 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5090 ; X64-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
5091 ; X64-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
5092 ; X64-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5093 ; X64-NEXT: vmovaps %xmm0, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe8]
5094 ; X64-NEXT: vfixupimmps $6, %xmm4, %xmm1, %xmm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x54,0xec,0x06]
5095 ; X64-NEXT: vaddps %xmm5, %xmm3, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xdd]
5096 ; X64-NEXT: vfixupimmps $7, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x54,0xc2,0x07]
5097 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
5098 ; X64-NEXT: retq # encoding: [0xc3]
5099 %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
5100 %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 6, i8 %x4)
5101 %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 7, i8 -1)
5102 %res3 = fadd <4 x float> %res, %res1
5103 %res4 = fadd <4 x float> %res3, %res2
5104 ret <4 x float> %res4
5107 declare <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
5109 define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
5110 ; X86-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
5112 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5113 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5114 ; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5115 ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
5116 ; X86-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5117 ; X86-NEXT: vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5118 ; X86-NEXT: vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xec,0x06]
5119 ; X86-NEXT: vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5120 ; X86-NEXT: vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5121 ; X86-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5122 ; X86-NEXT: retl # encoding: [0xc3]
5124 ; X64-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
5126 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5127 ; X64-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5128 ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
5129 ; X64-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5130 ; X64-NEXT: vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5131 ; X64-NEXT: vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x54,0xec,0x06]
5132 ; X64-NEXT: vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5133 ; X64-NEXT: vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5134 ; X64-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5135 ; X64-NEXT: retq # encoding: [0xc3]
5136 %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
5137 %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 6, i8 %x4)
5138 %res2 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 7, i8 -1)
5139 %res3 = fadd <8 x float> %res, %res1
5140 %res4 = fadd <8 x float> %res3, %res2
5141 ret <8 x float> %res4
5144 declare <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
5146 define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
5147 ; X86-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
5149 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5150 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5151 ; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5152 ; X86-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
5153 ; X86-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5154 ; X86-NEXT: vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5155 ; X86-NEXT: vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xec,0x06]
5156 ; X86-NEXT: vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5157 ; X86-NEXT: vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5158 ; X86-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5159 ; X86-NEXT: retl # encoding: [0xc3]
5161 ; X64-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
5163 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5164 ; X64-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
5165 ; X64-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
5166 ; X64-NEXT: vxorps %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd8,0x57,0xe4]
5167 ; X64-NEXT: vmovaps %ymm0, %ymm5 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe8]
5168 ; X64-NEXT: vfixupimmps $6, %ymm4, %ymm1, %ymm5 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x54,0xec,0x06]
5169 ; X64-NEXT: vaddps %ymm5, %ymm3, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xdd]
5170 ; X64-NEXT: vfixupimmps $7, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x54,0xc2,0x07]
5171 ; X64-NEXT: vaddps %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
5172 ; X64-NEXT: retq # encoding: [0xc3]
5173 %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
5174 %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 6, i8 %x4)
5175 %res2 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 7, i8 -1)
5176 %res3 = fadd <8 x float> %res, %res1
5177 %res4 = fadd <8 x float> %res3, %res2
5178 ret <8 x float> %res4
5181 define <2 x i64> @test_x86_avx512_psra_q_128(<2 x i64> %a0, <2 x i64> %a1) {
5182 ; CHECK-LABEL: test_x86_avx512_psra_q_128:
5184 ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xc1]
5185 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5186 %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5189 define <2 x i64> @test_x86_avx512_mask_psra_q_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %passthru, i8 %mask) {
5190 ; X86-LABEL: test_x86_avx512_mask_psra_q_128:
5192 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5193 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5194 ; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
5195 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5196 ; X86-NEXT: retl # encoding: [0xc3]
5198 ; X64-LABEL: test_x86_avx512_mask_psra_q_128:
5200 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5201 ; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
5202 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5203 ; X64-NEXT: retq # encoding: [0xc3]
5204 %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5205 %mask.cast = bitcast i8 %mask to <8 x i1>
5206 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5207 %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %passthru
5210 define <2 x i64> @test_x86_avx512_maskz_psra_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5211 ; X86-LABEL: test_x86_avx512_maskz_psra_q_128:
5213 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5214 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5215 ; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
5216 ; X86-NEXT: retl # encoding: [0xc3]
5218 ; X64-LABEL: test_x86_avx512_maskz_psra_q_128:
5220 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5221 ; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
5222 ; X64-NEXT: retq # encoding: [0xc3]
5223 %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
5224 %mask.cast = bitcast i8 %mask to <8 x i1>
5225 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5226 %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5229 declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) nounwind readnone
5232 define <4 x i64> @test_x86_avx512_psra_q_256(<4 x i64> %a0, <2 x i64> %a1) {
5233 ; CHECK-LABEL: test_x86_avx512_psra_q_256:
5235 ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xc1]
5236 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5237 %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5240 define <4 x i64> @test_x86_avx512_mask_psra_q_256(<4 x i64> %a0, <2 x i64> %a1, <4 x i64> %passthru, i8 %mask) {
5241 ; X86-LABEL: test_x86_avx512_mask_psra_q_256:
5243 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5244 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5245 ; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
5246 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5247 ; X86-NEXT: retl # encoding: [0xc3]
5249 ; X64-LABEL: test_x86_avx512_mask_psra_q_256:
5251 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5252 ; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
5253 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5254 ; X64-NEXT: retq # encoding: [0xc3]
5255 %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5256 %mask.cast = bitcast i8 %mask to <8 x i1>
5257 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5258 %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %passthru
5261 define <4 x i64> @test_x86_avx512_maskz_psra_q_256(<4 x i64> %a0, <2 x i64> %a1, <4 x i64> %passthru, i8 %mask) {
5262 ; X86-LABEL: test_x86_avx512_maskz_psra_q_256:
5264 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5265 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5266 ; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
5267 ; X86-NEXT: retl # encoding: [0xc3]
5269 ; X64-LABEL: test_x86_avx512_maskz_psra_q_256:
5271 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5272 ; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
5273 ; X64-NEXT: retq # encoding: [0xc3]
5274 %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
5275 %mask.cast = bitcast i8 %mask to <8 x i1>
5276 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5277 %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5280 declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) nounwind readnone
5283 define <2 x i64> @test_x86_avx512_psrai_q_128(<2 x i64> %a0) {
5284 ; CHECK-LABEL: test_x86_avx512_psrai_q_128:
5286 ; CHECK-NEXT: vpsraq $7, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x07]
5287 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5288 %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5291 define <2 x i64> @test_x86_avx512_mask_psrai_q_128(<2 x i64> %a0, <2 x i64> %passthru, i8 %mask) {
5292 ; X86-LABEL: test_x86_avx512_mask_psrai_q_128:
5294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5295 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5296 ; X86-NEXT: vpsraq $7, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x07]
5297 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5298 ; X86-NEXT: retl # encoding: [0xc3]
5300 ; X64-LABEL: test_x86_avx512_mask_psrai_q_128:
5302 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5303 ; X64-NEXT: vpsraq $7, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x07]
5304 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5305 ; X64-NEXT: retq # encoding: [0xc3]
5306 %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5307 %mask.cast = bitcast i8 %mask to <8 x i1>
5308 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5309 %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %passthru
5312 define <2 x i64> @test_x86_avx512_maskz_psrai_q_128(<2 x i64> %a0, i8 %mask) {
5313 ; X86-LABEL: test_x86_avx512_maskz_psrai_q_128:
5315 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5316 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5317 ; X86-NEXT: vpsraq $7, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x07]
5318 ; X86-NEXT: retl # encoding: [0xc3]
5320 ; X64-LABEL: test_x86_avx512_maskz_psrai_q_128:
5322 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5323 ; X64-NEXT: vpsraq $7, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x07]
5324 ; X64-NEXT: retq # encoding: [0xc3]
5325 %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5326 %mask.cast = bitcast i8 %mask to <8 x i1>
5327 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5328 %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5331 declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) nounwind readnone
5334 define <4 x i64> @test_x86_avx512_psrai_q_256(<4 x i64> %a0) {
5335 ; CHECK-LABEL: test_x86_avx512_psrai_q_256:
5337 ; CHECK-NEXT: vpsraq $7, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x07]
5338 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5339 %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5342 define <4 x i64> @test_x86_avx512_mask_psrai_q_256(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
5343 ; X86-LABEL: test_x86_avx512_mask_psrai_q_256:
5345 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5346 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5347 ; X86-NEXT: vpsraq $7, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x07]
5348 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5349 ; X86-NEXT: retl # encoding: [0xc3]
5351 ; X64-LABEL: test_x86_avx512_mask_psrai_q_256:
5353 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5354 ; X64-NEXT: vpsraq $7, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x07]
5355 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5356 ; X64-NEXT: retq # encoding: [0xc3]
5357 %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5358 %mask.cast = bitcast i8 %mask to <8 x i1>
5359 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5360 %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %passthru
5363 define <4 x i64> @test_x86_avx512_maskz_psrai_q_256(<4 x i64> %a0, i8 %mask) {
5364 ; X86-LABEL: test_x86_avx512_maskz_psrai_q_256:
5366 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5367 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5368 ; X86-NEXT: vpsraq $7, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x07]
5369 ; X86-NEXT: retl # encoding: [0xc3]
5371 ; X64-LABEL: test_x86_avx512_maskz_psrai_q_256:
5373 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5374 ; X64-NEXT: vpsraq $7, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x07]
5375 ; X64-NEXT: retq # encoding: [0xc3]
5376 %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
5377 %mask.cast = bitcast i8 %mask to <8 x i1>
5378 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5379 %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5382 declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) nounwind readnone
5384 define <2 x i64> @test_x86_avx512_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1) {
5385 ; CHECK-LABEL: test_x86_avx512_psrav_q_128:
5387 ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xc1]
5388 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5389 %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5393 define <2 x i64> @test_x86_avx512_mask_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, i8 %mask) {
5394 ; X86-LABEL: test_x86_avx512_mask_psrav_q_128:
5396 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5397 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5398 ; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
5399 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5400 ; X86-NEXT: retl # encoding: [0xc3]
5402 ; X64-LABEL: test_x86_avx512_mask_psrav_q_128:
5404 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5405 ; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
5406 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5407 ; X64-NEXT: retq # encoding: [0xc3]
5408 %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5409 %mask.cast = bitcast i8 %mask to <8 x i1>
5410 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5411 %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> %a2
5415 define <2 x i64> @test_x86_avx512_maskz_psrav_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5416 ; X86-LABEL: test_x86_avx512_maskz_psrav_q_128:
5418 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5419 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5420 ; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
5421 ; X86-NEXT: retl # encoding: [0xc3]
5423 ; X64-LABEL: test_x86_avx512_maskz_psrav_q_128:
5425 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5426 ; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
5427 ; X64-NEXT: retq # encoding: [0xc3]
5428 %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1)
5429 %mask.cast = bitcast i8 %mask to <8 x i1>
5430 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
5431 %res2 = select <2 x i1> %mask.extract, <2 x i64> %res, <2 x i64> zeroinitializer
5435 declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) nounwind readnone
5437 define <4 x i64> @test_x86_avx512_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1) {
5438 ; CHECK-LABEL: test_x86_avx512_psrav_q_256:
5440 ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xc1]
5441 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5442 %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5446 define <4 x i64> @test_x86_avx512_mask_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, i8 %mask) {
5447 ; X86-LABEL: test_x86_avx512_mask_psrav_q_256:
5449 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5450 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5451 ; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
5452 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5453 ; X86-NEXT: retl # encoding: [0xc3]
5455 ; X64-LABEL: test_x86_avx512_mask_psrav_q_256:
5457 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5458 ; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
5459 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5460 ; X64-NEXT: retq # encoding: [0xc3]
5461 %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5462 %mask.cast = bitcast i8 %mask to <8 x i1>
5463 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5464 %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> %a2
5468 define <4 x i64> @test_x86_avx512_maskz_psrav_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
5469 ; X86-LABEL: test_x86_avx512_maskz_psrav_q_256:
5471 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5472 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5473 ; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
5474 ; X86-NEXT: retl # encoding: [0xc3]
5476 ; X64-LABEL: test_x86_avx512_maskz_psrav_q_256:
5478 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5479 ; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
5480 ; X64-NEXT: retq # encoding: [0xc3]
5481 %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1)
5482 %mask.cast = bitcast i8 %mask to <8 x i1>
5483 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5484 %res2 = select <4 x i1> %mask.extract, <4 x i64> %res, <4 x i64> zeroinitializer
5488 declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5490 define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
5491 ; CHECK-LABEL: test_vfmadd256_ps:
5493 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
5494 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
5495 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5496 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
5500 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
5501 ; X86-LABEL: test_mask_vfmadd256_ps:
5503 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5504 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5505 ; X86-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
5506 ; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5507 ; X86-NEXT: retl # encoding: [0xc3]
5509 ; X64-LABEL: test_mask_vfmadd256_ps:
5511 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5512 ; X64-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
5513 ; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5514 ; X64-NEXT: retq # encoding: [0xc3]
5515 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
5516 %2 = bitcast i8 %mask to <8 x i1>
5517 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %a0
5521 define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
5522 ; CHECK-LABEL: test_vfmadd128_ps:
5524 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
5525 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
5526 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5527 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
5531 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5532 ; X86-LABEL: test_mask_vfmadd128_ps:
5534 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5535 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5536 ; X86-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
5537 ; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5538 ; X86-NEXT: retl # encoding: [0xc3]
5540 ; X64-LABEL: test_mask_vfmadd128_ps:
5542 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5543 ; X64-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
5544 ; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5545 ; X64-NEXT: retq # encoding: [0xc3]
5546 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
5547 %2 = bitcast i8 %mask to <8 x i1>
5548 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5549 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
5553 define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
5554 ; CHECK-LABEL: test_fmadd256_pd:
5556 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
5557 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
5558 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5559 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
5563 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
5564 ; X86-LABEL: test_mask_fmadd256_pd:
5566 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5567 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5568 ; X86-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
5569 ; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5570 ; X86-NEXT: retl # encoding: [0xc3]
5572 ; X64-LABEL: test_mask_fmadd256_pd:
5574 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5575 ; X64-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
5576 ; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
5577 ; X64-NEXT: retq # encoding: [0xc3]
5578 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
5579 %2 = bitcast i8 %mask to <8 x i1>
5580 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5581 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %a
5585 define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
5586 ; CHECK-LABEL: test_fmadd128_pd:
5588 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
5589 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
5590 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5591 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5595 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
5596 ; X86-LABEL: test_mask_fmadd128_pd:
5598 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5599 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5600 ; X86-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
5601 ; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5602 ; X86-NEXT: retl # encoding: [0xc3]
5604 ; X64-LABEL: test_mask_fmadd128_pd:
5606 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5607 ; X64-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
5608 ; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
5609 ; X64-NEXT: retq # encoding: [0xc3]
5610 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5611 %2 = bitcast i8 %mask to <8 x i1>
5612 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5613 %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %a
5617 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5618 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
5620 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5621 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5622 ; X86-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
5623 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5624 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5625 ; X86-NEXT: retl # encoding: [0xc3]
5627 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
5629 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5630 ; X64-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
5631 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5632 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5633 ; X64-NEXT: retq # encoding: [0xc3]
5634 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
5635 %2 = bitcast i8 %x3 to <8 x i1>
5636 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5637 %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %x2
5641 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5642 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
5644 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5645 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5646 ; X86-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
5647 ; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5648 ; X86-NEXT: retl # encoding: [0xc3]
5650 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
5652 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5653 ; X64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
5654 ; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5655 ; X64-NEXT: retq # encoding: [0xc3]
5656 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
5657 %2 = bitcast i8 %x3 to <8 x i1>
5658 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
5659 %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> zeroinitializer
5663 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5664 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
5666 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5667 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5668 ; X86-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
5669 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5670 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5671 ; X86-NEXT: retl # encoding: [0xc3]
5673 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
5675 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5676 ; X64-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
5677 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5678 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5679 ; X64-NEXT: retq # encoding: [0xc3]
5680 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
5681 %2 = bitcast i8 %x3 to <8 x i1>
5682 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5683 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %x2
5687 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5688 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
5690 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5691 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5692 ; X86-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
5693 ; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5694 ; X86-NEXT: retl # encoding: [0xc3]
5696 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
5698 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5699 ; X64-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
5700 ; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5701 ; X64-NEXT: retq # encoding: [0xc3]
5702 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
5703 %2 = bitcast i8 %x3 to <8 x i1>
5704 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5705 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> zeroinitializer
5709 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5710 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
5712 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5713 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5714 ; X86-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
5715 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5716 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5717 ; X86-NEXT: retl # encoding: [0xc3]
5719 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
5721 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5722 ; X64-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
5723 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
5724 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5725 ; X64-NEXT: retq # encoding: [0xc3]
5726 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
5727 %2 = bitcast i8 %x3 to <8 x i1>
5728 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5729 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %x2
5733 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5734 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
5736 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5737 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5738 ; X86-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
5739 ; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5740 ; X86-NEXT: retl # encoding: [0xc3]
5742 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
5744 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5745 ; X64-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
5746 ; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
5747 ; X64-NEXT: retq # encoding: [0xc3]
5748 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
5749 %2 = bitcast i8 %x3 to <8 x i1>
5750 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5751 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> zeroinitializer
5755 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5756 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
5758 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5759 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5760 ; X86-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
5761 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5762 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5763 ; X86-NEXT: retl # encoding: [0xc3]
5765 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
5767 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5768 ; X64-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
5769 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
5770 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5771 ; X64-NEXT: retq # encoding: [0xc3]
5772 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
5773 %2 = bitcast i8 %x3 to <8 x i1>
5774 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %x2
5778 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5779 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
5781 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5782 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5783 ; X86-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
5784 ; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5785 ; X86-NEXT: retl # encoding: [0xc3]
5787 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
5789 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5790 ; X64-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
5791 ; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
5792 ; X64-NEXT: retq # encoding: [0xc3]
5793 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
5794 %2 = bitcast i8 %x3 to <8 x i1>
5795 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> zeroinitializer
5799 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5800 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
5802 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5803 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5804 ; X86-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
5805 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5806 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5807 ; X86-NEXT: retl # encoding: [0xc3]
5809 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
5811 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5812 ; X64-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
5813 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5814 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5815 ; X64-NEXT: retq # encoding: [0xc3]
5816 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
5817 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %1)
5818 %3 = bitcast i8 %x3 to <8 x i1>
5819 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
5820 %4 = select <2 x i1> %extract, <2 x double> %2, <2 x double> %x2
5824 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
5825 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
5827 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5828 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5829 ; X86-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
5830 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5831 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5832 ; X86-NEXT: retl # encoding: [0xc3]
5834 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
5836 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5837 ; X64-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
5838 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5839 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5840 ; X64-NEXT: retq # encoding: [0xc3]
5841 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
5842 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %1)
5843 %3 = bitcast i8 %x3 to <8 x i1>
5844 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 %4 = select <4 x i1> %extract, <4 x double> %2, <4 x double> %x2
5849 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5850 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
5852 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5853 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5854 ; X86-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
5855 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5856 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5857 ; X86-NEXT: retl # encoding: [0xc3]
5859 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
5861 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5862 ; X64-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
5863 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
5864 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5865 ; X64-NEXT: retq # encoding: [0xc3]
5866 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
5867 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %1)
5868 %3 = bitcast i8 %x3 to <8 x i1>
5869 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5870 %4 = select <4 x i1> %extract, <4 x float> %2, <4 x float> %x2
5874 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
5875 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
5877 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5878 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5879 ; X86-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
5880 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5881 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5882 ; X86-NEXT: retl # encoding: [0xc3]
5884 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
5886 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5887 ; X64-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
5888 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
5889 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5890 ; X64-NEXT: retq # encoding: [0xc3]
5891 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
5892 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %1)
5893 %3 = bitcast i8 %x3 to <8 x i1>
5894 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %x2
5898 define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
5899 ; CHECK-LABEL: test_vfnmadd256_ps:
5901 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
5902 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
5903 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5904 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5905 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %a2)
5909 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
5910 ; X86-LABEL: test_mask_vfnmadd256_ps:
5912 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5913 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5914 ; X86-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
5915 ; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
5916 ; X86-NEXT: retl # encoding: [0xc3]
5918 ; X64-LABEL: test_mask_vfnmadd256_ps:
5920 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5921 ; X64-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
5922 ; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
5923 ; X64-NEXT: retq # encoding: [0xc3]
5924 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5925 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %a2)
5926 %3 = bitcast i8 %mask to <8 x i1>
5927 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %a0
5931 define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
5932 ; CHECK-LABEL: test_vfnmadd128_ps:
5934 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
5935 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
5936 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5937 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5938 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %a2)
5942 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5943 ; X86-LABEL: test_mask_vfnmadd128_ps:
5945 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5946 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5947 ; X86-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
5948 ; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
5949 ; X86-NEXT: retl # encoding: [0xc3]
5951 ; X64-LABEL: test_mask_vfnmadd128_ps:
5953 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5954 ; X64-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
5955 ; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
5956 ; X64-NEXT: retq # encoding: [0xc3]
5957 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
5958 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %a2)
5959 %3 = bitcast i8 %mask to <8 x i1>
5960 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5961 %4 = select <4 x i1> %extract, <4 x float> %2, <4 x float> %a0
5965 define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
5966 ; CHECK-LABEL: test_vfnmadd256_pd:
5968 ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
5969 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
5970 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5971 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
5972 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %a2)
5976 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
5977 ; X86-LABEL: test_mask_vfnmadd256_pd:
5979 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5980 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5981 ; X86-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
5982 ; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
5983 ; X86-NEXT: retl # encoding: [0xc3]
5985 ; X64-LABEL: test_mask_vfnmadd256_pd:
5987 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5988 ; X64-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
5989 ; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
5990 ; X64-NEXT: retq # encoding: [0xc3]
5991 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
5992 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %a2)
5993 %3 = bitcast i8 %mask to <8 x i1>
5994 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5995 %4 = select <4 x i1> %extract, <4 x double> %2, <4 x double> %a0
5999 define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6000 ; CHECK-LABEL: test_vfnmadd128_pd:
6002 ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
6003 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
6004 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6005 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6006 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %a2)
6010 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6011 ; X86-LABEL: test_mask_vfnmadd128_pd:
6013 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6014 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6015 ; X86-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
6016 ; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
6017 ; X86-NEXT: retl # encoding: [0xc3]
6019 ; X64-LABEL: test_mask_vfnmadd128_pd:
6021 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6022 ; X64-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
6023 ; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
6024 ; X64-NEXT: retq # encoding: [0xc3]
6025 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6026 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %a2)
6027 %3 = bitcast i8 %mask to <8 x i1>
6028 %extract = shufflevector <8 x i1> %3, <8 x i1> %3, <2 x i32> <i32 0, i32 1>
6029 %4 = select <2 x i1> %extract, <2 x double> %2, <2 x double> %a0
6033 define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
6034 ; CHECK-LABEL: test_vfnmsub256_ps:
6036 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
6037 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
6038 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6039 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6040 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6041 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %2)
6045 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
6046 ; X86-LABEL: test_mask_vfnmsub256_ps:
6048 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6049 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6050 ; X86-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
6051 ; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6052 ; X86-NEXT: retl # encoding: [0xc3]
6054 ; X64-LABEL: test_mask_vfnmsub256_ps:
6056 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6057 ; X64-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
6058 ; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6059 ; X64-NEXT: retq # encoding: [0xc3]
6060 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6061 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6062 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %1, <8 x float> %2)
6063 %4 = bitcast i8 %mask to <8 x i1>
6064 %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %a0
6068 define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
6069 ; CHECK-LABEL: test_vfnmsub128_ps:
6071 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
6072 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
6073 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6074 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6075 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6076 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %2)
6080 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
6081 ; X86-LABEL: test_mask_vfnmsub128_ps:
6083 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6084 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6085 ; X86-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
6086 ; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6087 ; X86-NEXT: retl # encoding: [0xc3]
6089 ; X64-LABEL: test_mask_vfnmsub128_ps:
6091 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6092 ; X64-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
6093 ; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6094 ; X64-NEXT: retq # encoding: [0xc3]
6095 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
6096 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
6097 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %1, <4 x float> %2)
6098 %4 = bitcast i8 %mask to <8 x i1>
6099 %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 %5 = select <4 x i1> %extract, <4 x float> %3, <4 x float> %a0
6104 define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
6105 ; CHECK-LABEL: test_vfnmsub256_pd:
6107 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
6108 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
6109 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6110 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
6111 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6112 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %2)
6116 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
6117 ; X86-LABEL: test_mask_vfnmsub256_pd:
6119 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6120 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6121 ; X86-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
6122 ; X86-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6123 ; X86-NEXT: retl # encoding: [0xc3]
6125 ; X64-LABEL: test_mask_vfnmsub256_pd:
6127 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6128 ; X64-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
6129 ; X64-NEXT: # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
6130 ; X64-NEXT: retq # encoding: [0xc3]
6131 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
6132 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6133 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %1, <4 x double> %2)
6134 %4 = bitcast i8 %mask to <8 x i1>
6135 %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6136 %5 = select <4 x i1> %extract, <4 x double> %3, <4 x double> %a0
6140 define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6141 ; CHECK-LABEL: test_vfnmsub128_pd:
6143 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
6144 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
6145 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6146 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6147 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6148 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %2)
6152 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6153 ; X86-LABEL: test_mask_vfnmsub128_pd:
6155 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6156 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6157 ; X86-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
6158 ; X86-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6159 ; X86-NEXT: retl # encoding: [0xc3]
6161 ; X64-LABEL: test_mask_vfnmsub128_pd:
6163 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6164 ; X64-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
6165 ; X64-NEXT: # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
6166 ; X64-NEXT: retq # encoding: [0xc3]
6167 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a1
6168 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6169 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %1, <2 x double> %2)
6170 %4 = bitcast i8 %mask to <8 x i1>
6171 %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <2 x i32> <i32 0, i32 1>
6172 %5 = select <2 x i1> %extract, <2 x double> %3, <2 x double> %a0
6176 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6177 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
6179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6180 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6181 ; X86-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
6182 ; X86-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6183 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6184 ; X86-NEXT: retl # encoding: [0xc3]
6186 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
6188 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6189 ; X64-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
6190 ; X64-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6191 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6192 ; X64-NEXT: retq # encoding: [0xc3]
6193 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x0
6194 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6195 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %x1, <2 x double> %2)
6196 %4 = bitcast i8 %x3 to <8 x i1>
6197 %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <2 x i32> <i32 0, i32 1>
6198 %5 = select <2 x i1> %extract, <2 x double> %3, <2 x double> %x2
6202 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6203 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
6205 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6206 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6207 ; X86-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
6208 ; X86-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6209 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6210 ; X86-NEXT: retl # encoding: [0xc3]
6212 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
6214 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6215 ; X64-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
6216 ; X64-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6217 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6218 ; X64-NEXT: retq # encoding: [0xc3]
6219 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x0
6220 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6221 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %x1, <4 x double> %2)
6222 %4 = bitcast i8 %x3 to <8 x i1>
6223 %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6224 %5 = select <4 x i1> %extract, <4 x double> %3, <4 x double> %x2
6228 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6229 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
6231 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6232 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6233 ; X86-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
6234 ; X86-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6235 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6236 ; X86-NEXT: retl # encoding: [0xc3]
6238 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
6240 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6241 ; X64-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
6242 ; X64-NEXT: # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
6243 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6244 ; X64-NEXT: retq # encoding: [0xc3]
6245 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
6246 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6247 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %x1, <4 x float> %2)
6248 %4 = bitcast i8 %x3 to <8 x i1>
6249 %extract = shufflevector <8 x i1> %4, <8 x i1> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6250 %5 = select <4 x i1> %extract, <4 x float> %3, <4 x float> %x2
6254 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6255 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
6257 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6258 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6259 ; X86-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
6260 ; X86-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6261 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6262 ; X86-NEXT: retl # encoding: [0xc3]
6264 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
6266 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6267 ; X64-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
6268 ; X64-NEXT: # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
6269 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6270 ; X64-NEXT: retq # encoding: [0xc3]
6271 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
6272 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6273 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %x1, <8 x float> %2)
6274 %4 = bitcast i8 %x3 to <8 x i1>
6275 %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %x2
6279 define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
6280 ; CHECK-LABEL: test_fmaddsub256_ps:
6282 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
6283 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
6284 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6285 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
6286 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6287 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %2)
6288 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6292 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
6293 ; X86-LABEL: test_mask_fmaddsub256_ps:
6295 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6296 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6297 ; X86-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
6298 ; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6299 ; X86-NEXT: retl # encoding: [0xc3]
6301 ; X64-LABEL: test_mask_fmaddsub256_ps:
6303 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6304 ; X64-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
6305 ; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6306 ; X64-NEXT: retq # encoding: [0xc3]
6307 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
6308 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6309 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %2)
6310 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6311 %5 = bitcast i8 %mask to <8 x i1>
6312 %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %a
6316 define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
6317 ; CHECK-LABEL: test_fmaddsub128_ps:
6319 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
6320 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
6321 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6322 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
6323 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6324 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %2)
6325 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6329 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
6330 ; X86-LABEL: test_mask_fmaddsub128_ps:
6332 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6333 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6334 ; X86-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
6335 ; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6336 ; X86-NEXT: retl # encoding: [0xc3]
6338 ; X64-LABEL: test_mask_fmaddsub128_ps:
6340 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6341 ; X64-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
6342 ; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6343 ; X64-NEXT: retq # encoding: [0xc3]
6344 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
6345 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
6346 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %2)
6347 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6348 %5 = bitcast i8 %mask to <8 x i1>
6349 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6350 %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %a
6354 define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
6355 ; CHECK-LABEL: test_vfmaddsub256_pd:
6357 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
6358 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
6359 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6360 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6361 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6362 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
6363 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6367 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
6368 ; X86-LABEL: test_mask_vfmaddsub256_pd:
6370 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6371 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6372 ; X86-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
6373 ; X86-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6374 ; X86-NEXT: retl # encoding: [0xc3]
6376 ; X64-LABEL: test_mask_vfmaddsub256_pd:
6378 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6379 ; X64-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
6380 ; X64-NEXT: # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
6381 ; X64-NEXT: retq # encoding: [0xc3]
6382 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6383 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
6384 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
6385 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6386 %5 = bitcast i8 %mask to <8 x i1>
6387 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6388 %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %a0
6392 define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
6393 ; CHECK-LABEL: test_vfmaddsub128_pd:
6395 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
6396 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
6397 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
6398 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6399 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6400 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
6401 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6405 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
6406 ; X86-LABEL: test_mask_vfmaddsub128_pd:
6408 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6409 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6410 ; X86-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
6411 ; X86-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6412 ; X86-NEXT: retl # encoding: [0xc3]
6414 ; X64-LABEL: test_mask_vfmaddsub128_pd:
6416 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6417 ; X64-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
6418 ; X64-NEXT: # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
6419 ; X64-NEXT: retq # encoding: [0xc3]
6420 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6421 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
6422 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
6423 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6424 %5 = bitcast i8 %mask to <8 x i1>
6425 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6426 %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %a0
6430 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6431 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
6433 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6434 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6435 ; X86-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
6436 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6437 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6438 ; X86-NEXT: retl # encoding: [0xc3]
6440 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
6442 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6443 ; X64-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
6444 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6445 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6446 ; X64-NEXT: retq # encoding: [0xc3]
6447 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6448 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6449 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6450 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6451 %5 = bitcast i8 %x3 to <8 x i1>
6452 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6453 %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %x2
6457 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6458 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
6460 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6461 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6462 ; X86-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
6463 ; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6464 ; X86-NEXT: retl # encoding: [0xc3]
6466 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
6468 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6469 ; X64-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
6470 ; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6471 ; X64-NEXT: retq # encoding: [0xc3]
6472 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6473 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6474 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6475 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
6476 %5 = bitcast i8 %x3 to <8 x i1>
6477 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6478 %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> zeroinitializer
6482 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6483 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
6485 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6486 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6487 ; X86-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
6488 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6489 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6490 ; X86-NEXT: retl # encoding: [0xc3]
6492 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
6494 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6495 ; X64-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
6496 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6497 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6498 ; X64-NEXT: retq # encoding: [0xc3]
6499 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6500 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6501 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6502 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6503 %5 = bitcast i8 %x3 to <8 x i1>
6504 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6505 %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %x2
6509 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6510 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
6512 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6513 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6514 ; X86-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
6515 ; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6516 ; X86-NEXT: retl # encoding: [0xc3]
6518 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
6520 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6521 ; X64-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
6522 ; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6523 ; X64-NEXT: retq # encoding: [0xc3]
6524 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6525 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6526 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6527 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6528 %5 = bitcast i8 %x3 to <8 x i1>
6529 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6530 %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> zeroinitializer
6534 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6535 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
6537 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6538 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6539 ; X86-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
6540 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6541 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6542 ; X86-NEXT: retl # encoding: [0xc3]
6544 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
6546 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6547 ; X64-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
6548 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
6549 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6550 ; X64-NEXT: retq # encoding: [0xc3]
6551 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6552 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6553 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6554 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6555 %5 = bitcast i8 %x3 to <8 x i1>
6556 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6557 %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %x2
6561 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6562 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
6564 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6565 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6566 ; X86-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
6567 ; X86-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6568 ; X86-NEXT: retl # encoding: [0xc3]
6570 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
6572 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6573 ; X64-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
6574 ; X64-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
6575 ; X64-NEXT: retq # encoding: [0xc3]
6576 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6577 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6578 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6579 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6580 %5 = bitcast i8 %x3 to <8 x i1>
6581 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6582 %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> zeroinitializer
6586 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6587 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
6589 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6590 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6591 ; X86-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
6592 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6593 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6594 ; X86-NEXT: retl # encoding: [0xc3]
6596 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
6598 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6599 ; X64-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
6600 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
6601 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6602 ; X64-NEXT: retq # encoding: [0xc3]
6603 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6604 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6605 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6606 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6607 %5 = bitcast i8 %x3 to <8 x i1>
6608 %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %x2
6612 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6613 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
6615 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6616 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6617 ; X86-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
6618 ; X86-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6619 ; X86-NEXT: retl # encoding: [0xc3]
6621 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
6623 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6624 ; X64-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
6625 ; X64-NEXT: # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
6626 ; X64-NEXT: retq # encoding: [0xc3]
6627 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6628 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6629 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6630 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6631 %5 = bitcast i8 %x3 to <8 x i1>
6632 %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> zeroinitializer
6636 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6637 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
6639 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6640 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6641 ; X86-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
6642 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6643 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6644 ; X86-NEXT: retl # encoding: [0xc3]
6646 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
6648 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6649 ; X64-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
6650 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6651 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
6652 ; X64-NEXT: retq # encoding: [0xc3]
6653 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2)
6654 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x2
6655 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %2)
6656 %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
6657 %5 = bitcast i8 %x3 to <8 x i1>
6658 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
6659 %6 = select <2 x i1> %extract, <2 x double> %4, <2 x double> %x2
6663 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
6664 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
6666 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6667 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6668 ; X86-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
6669 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6670 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6671 ; X86-NEXT: retl # encoding: [0xc3]
6673 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
6675 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6676 ; X64-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
6677 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6678 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
6679 ; X64-NEXT: retq # encoding: [0xc3]
6680 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2)
6681 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
6682 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %2)
6683 %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6684 %5 = bitcast i8 %x3 to <8 x i1>
6685 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6686 %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> %x2
6690 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6691 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
6693 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6694 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6695 ; X86-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
6696 ; X86-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6697 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6698 ; X86-NEXT: retl # encoding: [0xc3]
6700 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
6702 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6703 ; X64-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
6704 ; X64-NEXT: # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
6705 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
6706 ; X64-NEXT: retq # encoding: [0xc3]
6707 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2)
6708 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6709 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %2)
6710 %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
6711 %5 = bitcast i8 %x3 to <8 x i1>
6712 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6713 %6 = select <4 x i1> %extract, <4 x float> %4, <4 x float> %x2
6717 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
6718 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
6720 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6721 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6722 ; X86-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
6723 ; X86-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6724 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6725 ; X86-NEXT: retl # encoding: [0xc3]
6727 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
6729 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6730 ; X64-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
6731 ; X64-NEXT: # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
6732 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
6733 ; X64-NEXT: retq # encoding: [0xc3]
6734 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2)
6735 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
6736 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %2)
6737 %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
6738 %5 = bitcast i8 %x3 to <8 x i1>
6739 %6 = select <8 x i1> %5, <8 x float> %4, <8 x float> %x2
6743 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6744 ; X86-LABEL: test_mask_vfmadd128_ps_rmk:
6746 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6747 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6748 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6749 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
6750 ; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6751 ; X86-NEXT: retl # encoding: [0xc3]
6753 ; X64-LABEL: test_mask_vfmadd128_ps_rmk:
6755 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6756 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
6757 ; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6758 ; X64-NEXT: retq # encoding: [0xc3]
6759 %a2 = load <4 x float>, ptr %ptr_a2
6760 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6761 %2 = bitcast i8 %mask to <8 x i1>
6762 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6763 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6767 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6768 ; X86-LABEL: test_mask_vfmadd128_ps_rmka:
6770 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6771 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6772 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6773 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
6774 ; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6775 ; X86-NEXT: retl # encoding: [0xc3]
6777 ; X64-LABEL: test_mask_vfmadd128_ps_rmka:
6779 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6780 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
6781 ; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6782 ; X64-NEXT: retq # encoding: [0xc3]
6783 %a2 = load <4 x float>, ptr %ptr_a2, align 8
6784 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6785 %2 = bitcast i8 %mask to <8 x i1>
6786 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6787 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6791 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6792 ; X86-LABEL: test_mask_vfmadd128_ps_rmkz:
6794 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6795 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
6796 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6797 ; X86-NEXT: retl # encoding: [0xc3]
6799 ; X64-LABEL: test_mask_vfmadd128_ps_rmkz:
6801 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
6802 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6803 ; X64-NEXT: retq # encoding: [0xc3]
6804 %a2 = load <4 x float>, ptr %ptr_a2
6805 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6809 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6810 ; X86-LABEL: test_mask_vfmadd128_ps_rmkza:
6812 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6813 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
6814 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6815 ; X86-NEXT: retl # encoding: [0xc3]
6817 ; X64-LABEL: test_mask_vfmadd128_ps_rmkza:
6819 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
6820 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6821 ; X64-NEXT: retq # encoding: [0xc3]
6822 %a2 = load <4 x float>, ptr %ptr_a2, align 4
6823 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
6827 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6828 ; X86-LABEL: test_mask_vfmadd128_ps_rmb:
6830 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6831 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6832 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6833 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
6834 ; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6835 ; X86-NEXT: retl # encoding: [0xc3]
6837 ; X64-LABEL: test_mask_vfmadd128_ps_rmb:
6839 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6840 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
6841 ; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6842 ; X64-NEXT: retq # encoding: [0xc3]
6843 %q = load float, ptr %ptr_a2
6844 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6845 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6846 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6847 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6848 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6849 %2 = bitcast i8 %mask to <8 x i1>
6850 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6851 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6855 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2, i8 %mask) {
6856 ; X86-LABEL: test_mask_vfmadd128_ps_rmba:
6858 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6859 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6860 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6861 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
6862 ; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6863 ; X86-NEXT: retl # encoding: [0xc3]
6865 ; X64-LABEL: test_mask_vfmadd128_ps_rmba:
6867 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6868 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
6869 ; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6870 ; X64-NEXT: retq # encoding: [0xc3]
6871 %q = load float, ptr %ptr_a2, align 4
6872 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6873 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6874 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6875 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6876 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6877 %2 = bitcast i8 %mask to <8 x i1>
6878 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6879 %3 = select <4 x i1> %extract, <4 x float> %1, <4 x float> %a0
6883 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6884 ; X86-LABEL: test_mask_vfmadd128_ps_rmbz:
6886 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6887 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
6888 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6889 ; X86-NEXT: retl # encoding: [0xc3]
6891 ; X64-LABEL: test_mask_vfmadd128_ps_rmbz:
6893 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
6894 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6895 ; X64-NEXT: retq # encoding: [0xc3]
6896 %q = load float, ptr %ptr_a2
6897 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6898 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6899 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6900 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6901 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6905 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, ptr %ptr_a2) {
6906 ; X86-LABEL: test_mask_vfmadd128_ps_rmbza:
6908 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6909 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
6910 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6911 ; X86-NEXT: retl # encoding: [0xc3]
6913 ; X64-LABEL: test_mask_vfmadd128_ps_rmbza:
6915 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
6916 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6917 ; X64-NEXT: retq # encoding: [0xc3]
6918 %q = load float, ptr %ptr_a2, align 4
6919 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
6920 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
6921 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
6922 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
6923 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i)
6927 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, ptr %ptr_a2, i8 %mask) {
6928 ; X86-LABEL: test_mask_vfmadd128_pd_rmk:
6930 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6931 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6932 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6933 ; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00]
6934 ; X86-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6935 ; X86-NEXT: retl # encoding: [0xc3]
6937 ; X64-LABEL: test_mask_vfmadd128_pd_rmk:
6939 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6940 ; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
6941 ; X64-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + mem
6942 ; X64-NEXT: retq # encoding: [0xc3]
6943 %a2 = load <2 x double>, ptr %ptr_a2
6944 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6945 %2 = bitcast i8 %mask to <8 x i1>
6946 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
6947 %3 = select <2 x i1> %extract, <2 x double> %1, <2 x double> %a0
6951 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, ptr %ptr_a2) {
6952 ; X86-LABEL: test_mask_vfmadd128_pd_rmkz:
6954 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6955 ; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00]
6956 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6957 ; X86-NEXT: retl # encoding: [0xc3]
6959 ; X64-LABEL: test_mask_vfmadd128_pd_rmkz:
6961 ; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
6962 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
6963 ; X64-NEXT: retq # encoding: [0xc3]
6964 %a2 = load <2 x double>, ptr %ptr_a2
6965 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
6969 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, ptr %ptr_a2, i8 %mask) {
6970 ; X86-LABEL: test_mask_vfmadd256_pd_rmk:
6972 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6973 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6974 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
6975 ; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00]
6976 ; X86-NEXT: # ymm0 {%k1} = (ymm1 * ymm0) + mem
6977 ; X86-NEXT: retl # encoding: [0xc3]
6979 ; X64-LABEL: test_mask_vfmadd256_pd_rmk:
6981 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6982 ; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
6983 ; X64-NEXT: # ymm0 {%k1} = (ymm1 * ymm0) + mem
6984 ; X64-NEXT: retq # encoding: [0xc3]
6985 %a2 = load <4 x double>, ptr %ptr_a2
6986 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
6987 %2 = bitcast i8 %mask to <8 x i1>
6988 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6989 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %a0
6993 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, ptr %ptr_a2) {
6994 ; X86-LABEL: test_mask_vfmadd256_pd_rmkz:
6996 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6997 ; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00]
6998 ; X86-NEXT: # ymm0 = (ymm1 * ymm0) + mem
6999 ; X86-NEXT: retl # encoding: [0xc3]
7001 ; X64-LABEL: test_mask_vfmadd256_pd_rmkz:
7003 ; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
7004 ; X64-NEXT: # ymm0 = (ymm1 * ymm0) + mem
7005 ; X64-NEXT: retq # encoding: [0xc3]
7006 %a2 = load <4 x double>, ptr %ptr_a2
7007 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
7011 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
7012 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
7013 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
7014 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
7015 declare <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double>, <2 x double>, <2 x i1>)
7016 declare <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float>, <4 x float>, <4 x i1>)
7017 declare <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
7018 declare <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
7019 declare <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double>, <2 x double>, <2 x i1>)
7020 declare <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float>, <4 x float>, <4 x i1>)
7021 declare <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
7022 declare <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
7023 declare <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double>, <4 x double>, <4 x i1>)
7024 declare <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float>, <8 x float>, <8 x i1>)
7025 declare <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
7026 declare <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)
7027 declare <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double>, <4 x double>, <4 x i1>)
7028 declare <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float>, <8 x float>, <8 x i1>)
7029 declare <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
7030 declare <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)