1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vldq-builtins.c
7 define <2 x double> @test_mm_cvtepi64_pd(<2 x i64> %__A) {
8 ; CHECK-LABEL: test_mm_cvtepi64_pd:
9 ; CHECK: # %bb.0: # %entry
10 ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0
11 ; CHECK-NEXT: ret{{[l|q]}}
13 %conv.i = sitofp <2 x i64> %__A to <2 x double>
14 ret <2 x double> %conv.i
17 define <2 x double> @test_mm_mask_cvtepi64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) {
18 ; X86-LABEL: test_mm_mask_cvtepi64_pd:
19 ; X86: # %bb.0: # %entry
20 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
21 ; X86-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1}
24 ; X64-LABEL: test_mm_mask_cvtepi64_pd:
25 ; X64: # %bb.0: # %entry
26 ; X64-NEXT: kmovw %edi, %k1
27 ; X64-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1}
30 %conv.i.i = sitofp <2 x i64> %__A to <2 x double>
31 %0 = bitcast i8 %__U to <8 x i1>
32 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
33 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W
37 define <2 x double> @test_mm_maskz_cvtepi64_pd(i8 zeroext %__U, <2 x i64> %__A) {
38 ; X86-LABEL: test_mm_maskz_cvtepi64_pd:
39 ; X86: # %bb.0: # %entry
40 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
41 ; X86-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z}
44 ; X64-LABEL: test_mm_maskz_cvtepi64_pd:
45 ; X64: # %bb.0: # %entry
46 ; X64-NEXT: kmovw %edi, %k1
47 ; X64-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z}
50 %conv.i.i = sitofp <2 x i64> %__A to <2 x double>
51 %0 = bitcast i8 %__U to <8 x i1>
52 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
53 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer
57 define <4 x double> @test_mm256_cvtepi64_pd(<4 x i64> %__A) {
58 ; CHECK-LABEL: test_mm256_cvtepi64_pd:
59 ; CHECK: # %bb.0: # %entry
60 ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0
61 ; CHECK-NEXT: ret{{[l|q]}}
63 %conv.i = sitofp <4 x i64> %__A to <4 x double>
64 ret <4 x double> %conv.i
67 define <4 x double> @test_mm256_mask_cvtepi64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) {
68 ; X86-LABEL: test_mm256_mask_cvtepi64_pd:
69 ; X86: # %bb.0: # %entry
70 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
71 ; X86-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1}
74 ; X64-LABEL: test_mm256_mask_cvtepi64_pd:
75 ; X64: # %bb.0: # %entry
76 ; X64-NEXT: kmovw %edi, %k1
77 ; X64-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1}
80 %conv.i.i = sitofp <4 x i64> %__A to <4 x double>
81 %0 = bitcast i8 %__U to <8 x i1>
82 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
83 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W
87 define <4 x double> @test_mm256_maskz_cvtepi64_pd(i8 zeroext %__U, <4 x i64> %__A) {
88 ; X86-LABEL: test_mm256_maskz_cvtepi64_pd:
89 ; X86: # %bb.0: # %entry
90 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
91 ; X86-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z}
94 ; X64-LABEL: test_mm256_maskz_cvtepi64_pd:
95 ; X64: # %bb.0: # %entry
96 ; X64-NEXT: kmovw %edi, %k1
97 ; X64-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z}
100 %conv.i.i = sitofp <4 x i64> %__A to <4 x double>
101 %0 = bitcast i8 %__U to <8 x i1>
102 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
103 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer
107 define <2 x double> @test_mm_cvtepu64_pd(<2 x i64> %__A) {
108 ; CHECK-LABEL: test_mm_cvtepu64_pd:
109 ; CHECK: # %bb.0: # %entry
110 ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0
111 ; CHECK-NEXT: ret{{[l|q]}}
113 %conv.i = uitofp <2 x i64> %__A to <2 x double>
114 ret <2 x double> %conv.i
117 define <2 x double> @test_mm_mask_cvtepu64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) {
118 ; X86-LABEL: test_mm_mask_cvtepu64_pd:
119 ; X86: # %bb.0: # %entry
120 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
121 ; X86-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1}
124 ; X64-LABEL: test_mm_mask_cvtepu64_pd:
125 ; X64: # %bb.0: # %entry
126 ; X64-NEXT: kmovw %edi, %k1
127 ; X64-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1}
130 %conv.i.i = uitofp <2 x i64> %__A to <2 x double>
131 %0 = bitcast i8 %__U to <8 x i1>
132 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
133 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W
137 define <2 x double> @test_mm_maskz_cvtepu64_pd(i8 zeroext %__U, <2 x i64> %__A) {
138 ; X86-LABEL: test_mm_maskz_cvtepu64_pd:
139 ; X86: # %bb.0: # %entry
140 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
141 ; X86-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z}
144 ; X64-LABEL: test_mm_maskz_cvtepu64_pd:
145 ; X64: # %bb.0: # %entry
146 ; X64-NEXT: kmovw %edi, %k1
147 ; X64-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z}
150 %conv.i.i = uitofp <2 x i64> %__A to <2 x double>
151 %0 = bitcast i8 %__U to <8 x i1>
152 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
153 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer
157 define <4 x double> @test_mm256_cvtepu64_pd(<4 x i64> %__A) {
158 ; CHECK-LABEL: test_mm256_cvtepu64_pd:
159 ; CHECK: # %bb.0: # %entry
160 ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0
161 ; CHECK-NEXT: ret{{[l|q]}}
163 %conv.i = uitofp <4 x i64> %__A to <4 x double>
164 ret <4 x double> %conv.i
167 define <4 x double> @test_mm256_mask_cvtepu64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) {
168 ; X86-LABEL: test_mm256_mask_cvtepu64_pd:
169 ; X86: # %bb.0: # %entry
170 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
171 ; X86-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1}
174 ; X64-LABEL: test_mm256_mask_cvtepu64_pd:
175 ; X64: # %bb.0: # %entry
176 ; X64-NEXT: kmovw %edi, %k1
177 ; X64-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1}
180 %conv.i.i = uitofp <4 x i64> %__A to <4 x double>
181 %0 = bitcast i8 %__U to <8 x i1>
182 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
183 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W
187 define <4 x double> @test_mm256_maskz_cvtepu64_pd(i8 zeroext %__U, <4 x i64> %__A) {
188 ; X86-LABEL: test_mm256_maskz_cvtepu64_pd:
189 ; X86: # %bb.0: # %entry
190 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
191 ; X86-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z}
194 ; X64-LABEL: test_mm256_maskz_cvtepu64_pd:
195 ; X64: # %bb.0: # %entry
196 ; X64-NEXT: kmovw %edi, %k1
197 ; X64-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z}
200 %conv.i.i = uitofp <4 x i64> %__A to <4 x double>
201 %0 = bitcast i8 %__U to <8 x i1>
202 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
203 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer
207 define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %__A) {
208 ; X86-LABEL: test_mm_mask_fpclass_pd_mask:
209 ; X86: # %bb.0: # %entry
210 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
211 ; X86-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1}
212 ; X86-NEXT: kmovw %k0, %eax
213 ; X86-NEXT: # kill: def $al killed $al killed $eax
216 ; X64-LABEL: test_mm_mask_fpclass_pd_mask:
217 ; X64: # %bb.0: # %entry
218 ; X64-NEXT: kmovw %edi, %k1
219 ; X64-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1}
220 ; X64-NEXT: kmovw %k0, %eax
221 ; X64-NEXT: # kill: def $al killed $al killed $eax
224 %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
225 %1 = bitcast i8 %__U to <8 x i1>
226 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
227 %2 = and <2 x i1> %0, %extract
228 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
229 %4 = bitcast <8 x i1> %3 to i8
233 declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
235 define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) {
236 ; CHECK-LABEL: test_mm_fpclass_pd_mask:
237 ; CHECK: # %bb.0: # %entry
238 ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0)
239 ; CHECK-NEXT: kmovw %k0, %eax
240 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
241 ; CHECK-NEXT: ret{{[l|q]}}
243 %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2)
244 %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
245 %2 = bitcast <8 x i1> %1 to i8
249 define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> %__A) {
250 ; X86-LABEL: test_mm256_mask_fpclass_pd_mask:
251 ; X86: # %bb.0: # %entry
252 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
253 ; X86-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1}
254 ; X86-NEXT: kmovw %k0, %eax
255 ; X86-NEXT: # kill: def $al killed $al killed $eax
256 ; X86-NEXT: vzeroupper
259 ; X64-LABEL: test_mm256_mask_fpclass_pd_mask:
260 ; X64: # %bb.0: # %entry
261 ; X64-NEXT: kmovw %edi, %k1
262 ; X64-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1}
263 ; X64-NEXT: kmovw %k0, %eax
264 ; X64-NEXT: # kill: def $al killed $al killed $eax
265 ; X64-NEXT: vzeroupper
268 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
269 %1 = bitcast i8 %__U to <8 x i1>
270 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
271 %2 = and <4 x i1> %0, %extract
272 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
273 %4 = bitcast <8 x i1> %3 to i8
277 declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
279 define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) {
280 ; CHECK-LABEL: test_mm256_fpclass_pd_mask:
281 ; CHECK: # %bb.0: # %entry
282 ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
283 ; CHECK-NEXT: kmovw %k0, %eax
284 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
285 ; CHECK-NEXT: vzeroupper
286 ; CHECK-NEXT: ret{{[l|q]}}
288 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2)
289 %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
290 %2 = bitcast <8 x i1> %1 to i8
294 define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__A) {
295 ; X86-LABEL: test_mm_mask_fpclass_ps_mask:
296 ; X86: # %bb.0: # %entry
297 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
298 ; X86-NEXT: vfpclassps $2, %xmm0, %k0 {%k1}
299 ; X86-NEXT: kmovw %k0, %eax
300 ; X86-NEXT: # kill: def $al killed $al killed $eax
303 ; X64-LABEL: test_mm_mask_fpclass_ps_mask:
304 ; X64: # %bb.0: # %entry
305 ; X64-NEXT: kmovw %edi, %k1
306 ; X64-NEXT: vfpclassps $2, %xmm0, %k0 {%k1}
307 ; X64-NEXT: kmovw %k0, %eax
308 ; X64-NEXT: # kill: def $al killed $al killed $eax
311 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
312 %1 = bitcast i8 %__U to <8 x i1>
313 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
314 %2 = and <4 x i1> %0, %extract
315 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
316 %4 = bitcast <8 x i1> %3 to i8
320 declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
322 define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) {
323 ; CHECK-LABEL: test_mm_fpclass_ps_mask:
324 ; CHECK: # %bb.0: # %entry
325 ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0)
326 ; CHECK-NEXT: kmovw %k0, %eax
327 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
328 ; CHECK-NEXT: ret{{[l|q]}}
330 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2)
331 %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
332 %2 = bitcast <8 x i1> %1 to i8
336 define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) {
337 ; X86-LABEL: test_mm256_mask_fpclass_ps_mask:
338 ; X86: # %bb.0: # %entry
339 ; X86-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
340 ; X86-NEXT: kmovw %k0, %eax
341 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al
342 ; X86-NEXT: # kill: def $al killed $al killed $eax
343 ; X86-NEXT: vzeroupper
346 ; X64-LABEL: test_mm256_mask_fpclass_ps_mask:
347 ; X64: # %bb.0: # %entry
348 ; X64-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
349 ; X64-NEXT: kmovw %k0, %eax
350 ; X64-NEXT: andb %dil, %al
351 ; X64-NEXT: # kill: def $al killed $al killed $eax
352 ; X64-NEXT: vzeroupper
355 %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
356 %1 = bitcast i8 %__U to <8 x i1>
357 %2 = and <8 x i1> %0, %1
358 %3 = bitcast <8 x i1> %2 to i8
362 declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
364 define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) {
365 ; CHECK-LABEL: test_mm256_fpclass_ps_mask:
366 ; CHECK: # %bb.0: # %entry
367 ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
368 ; CHECK-NEXT: kmovw %k0, %eax
369 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
370 ; CHECK-NEXT: vzeroupper
371 ; CHECK-NEXT: ret{{[l|q]}}
373 %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2)
374 %1 = bitcast <8 x i1> %0 to i8