1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
7 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
8 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
10 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
11 ; X86-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
12 ; X86-NEXT: vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
13 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
14 ; X86-NEXT: retl # encoding: [0xc3]
16 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
18 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
19 ; X64-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
20 ; X64-NEXT: vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
21 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
22 ; X64-NEXT: retq # encoding: [0xc3]
23 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
24 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
25 %res2 = add <2 x i64> %res, %res1
29 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
31 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
32 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
34 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
35 ; X86-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
36 ; X86-NEXT: vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
37 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
38 ; X86-NEXT: retl # encoding: [0xc3]
40 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
42 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
43 ; X64-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
44 ; X64-NEXT: vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
45 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
46 ; X64-NEXT: retq # encoding: [0xc3]
47 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
48 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
49 %res2 = add <4 x i64> %res, %res1
53 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
55 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
56 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
58 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
59 ; X86-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
60 ; X86-NEXT: vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
61 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
62 ; X86-NEXT: retl # encoding: [0xc3]
64 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
66 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
67 ; X64-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
68 ; X64-NEXT: vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
69 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
70 ; X64-NEXT: retq # encoding: [0xc3]
71 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
72 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
73 %res2 = add <2 x i64> %res, %res1
77 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
79 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
80 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
82 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
83 ; X86-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
84 ; X86-NEXT: vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
85 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
86 ; X86-NEXT: retl # encoding: [0xc3]
88 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
90 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
91 ; X64-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
92 ; X64-NEXT: vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
93 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
94 ; X64-NEXT: retq # encoding: [0xc3]
95 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
96 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
97 %res2 = add <4 x i64> %res, %res1
101 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
103 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
104 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
106 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
107 ; X86-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
108 ; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
109 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
110 ; X86-NEXT: retl # encoding: [0xc3]
112 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
114 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
115 ; X64-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
116 ; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
117 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
118 ; X64-NEXT: retq # encoding: [0xc3]
119 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
120 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
121 %res2 = add <2 x i64> %res, %res1
125 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
127 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
128 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
130 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
131 ; X86-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
132 ; X86-NEXT: vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
133 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
134 ; X86-NEXT: retl # encoding: [0xc3]
136 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
138 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
139 ; X64-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
140 ; X64-NEXT: vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
141 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
142 ; X64-NEXT: retq # encoding: [0xc3]
143 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
144 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
145 %res2 = add <4 x i64> %res, %res1
149 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
151 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
152 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
154 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
155 ; X86-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
156 ; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
157 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
158 ; X86-NEXT: retl # encoding: [0xc3]
160 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
162 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
163 ; X64-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
164 ; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
165 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
166 ; X64-NEXT: retq # encoding: [0xc3]
167 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
168 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
169 %res2 = add <2 x i64> %res, %res1
173 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
175 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
176 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
178 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
179 ; X86-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
180 ; X86-NEXT: vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
181 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
182 ; X86-NEXT: retl # encoding: [0xc3]
184 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
186 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
187 ; X64-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
188 ; X64-NEXT: vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
189 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
190 ; X64-NEXT: retq # encoding: [0xc3]
191 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
192 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
193 %res2 = add <4 x i64> %res, %res1
197 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
199 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
200 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
202 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
203 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
204 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
205 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
206 ; X86-NEXT: retl # encoding: [0xc3]
208 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
210 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
211 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
212 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
213 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
214 ; X64-NEXT: retq # encoding: [0xc3]
215 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
216 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
217 %res2 = fadd <4 x float> %res, %res1
218 ret <4 x float> %res2
221 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
222 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
224 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
225 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
226 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
227 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
228 ; X86-NEXT: retl # encoding: [0xc3]
230 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
232 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
233 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
234 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
235 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
236 ; X64-NEXT: retq # encoding: [0xc3]
237 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
238 %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
239 %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
240 %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
241 %res4 = fadd <4 x float> %res1, %res3
242 ret <4 x float> %res4
245 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
246 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
248 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
249 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
250 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
251 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
252 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
253 ; X86-NEXT: retl # encoding: [0xc3]
255 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
257 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
258 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
259 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
260 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
261 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
262 ; X64-NEXT: retq # encoding: [0xc3]
263 %cvt1 = sitofp <4 x i64> %x0 to <4 x float>
264 %1 = bitcast i8 %x2 to <8 x i1>
265 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
266 %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
267 %cvt = sitofp <4 x i64> %x0 to <4 x float>
268 %res2 = fadd <4 x float> %2, %cvt
269 ret <4 x float> %res2
272 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
274 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
275 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
277 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
278 ; X86-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
279 ; X86-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
280 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
281 ; X86-NEXT: retl # encoding: [0xc3]
283 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
285 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
286 ; X64-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
287 ; X64-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
288 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
289 ; X64-NEXT: retq # encoding: [0xc3]
290 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
291 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
292 %res2 = add <2 x i64> %res, %res1
296 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
298 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
299 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
301 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
302 ; X86-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
303 ; X86-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
304 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
305 ; X86-NEXT: retl # encoding: [0xc3]
307 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
309 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
310 ; X64-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
311 ; X64-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
312 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
313 ; X64-NEXT: retq # encoding: [0xc3]
314 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
315 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
316 %res2 = add <4 x i64> %res, %res1
320 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
322 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
323 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
325 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
326 ; X86-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
327 ; X86-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
328 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
329 ; X86-NEXT: retl # encoding: [0xc3]
331 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
333 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
334 ; X64-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
335 ; X64-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
336 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
337 ; X64-NEXT: retq # encoding: [0xc3]
338 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
339 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
340 %res2 = add <2 x i64> %res, %res1
344 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
346 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
347 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
349 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
350 ; X86-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
351 ; X86-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
352 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
353 ; X86-NEXT: retl # encoding: [0xc3]
355 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
357 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
358 ; X64-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
359 ; X64-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
360 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
361 ; X64-NEXT: retq # encoding: [0xc3]
362 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
363 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
364 %res2 = add <4 x i64> %res, %res1
368 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
370 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
371 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
373 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
374 ; X86-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
375 ; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
376 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
377 ; X86-NEXT: retl # encoding: [0xc3]
379 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
381 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
382 ; X64-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
383 ; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
384 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
385 ; X64-NEXT: retq # encoding: [0xc3]
386 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
387 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
388 %res2 = add <2 x i64> %res, %res1
392 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
394 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
395 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
397 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
398 ; X86-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
399 ; X86-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
400 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
401 ; X86-NEXT: retl # encoding: [0xc3]
403 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
405 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
406 ; X64-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
407 ; X64-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
408 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
409 ; X64-NEXT: retq # encoding: [0xc3]
410 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
411 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
412 %res2 = add <4 x i64> %res, %res1
416 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
418 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
419 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
421 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
422 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
423 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
424 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
425 ; X86-NEXT: retl # encoding: [0xc3]
427 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
429 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
430 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
431 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
432 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
433 ; X64-NEXT: retq # encoding: [0xc3]
434 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
435 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
436 %res2 = fadd <4 x float> %res, %res1
437 ret <4 x float> %res2
440 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
441 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
443 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
444 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
445 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
446 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
447 ; X86-NEXT: retl # encoding: [0xc3]
449 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
451 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
452 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
453 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
454 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
455 ; X64-NEXT: retq # encoding: [0xc3]
456 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
457 %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
458 %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
459 %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
460 %res4 = fadd <4 x float> %res1, %res3
461 ret <4 x float> %res4
464 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
466 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
467 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
469 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
470 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
471 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
472 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
473 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
474 ; X86-NEXT: retl # encoding: [0xc3]
476 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
478 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
479 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
480 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
481 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
482 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
483 ; X64-NEXT: retq # encoding: [0xc3]
484 %cvt1 = uitofp <4 x i64> %x0 to <4 x float>
485 %1 = bitcast i8 %x2 to <8 x i1>
486 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
487 %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
488 %cvt = uitofp <4 x i64> %x0 to <4 x float>
489 %res2 = fadd <4 x float> %2, %cvt
490 ret <4 x float> %res2
493 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
495 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
496 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
498 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
499 ; X86-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
500 ; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
501 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
502 ; X86-NEXT: retl # encoding: [0xc3]
504 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
506 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
507 ; X64-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
508 ; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
509 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
510 ; X64-NEXT: retq # encoding: [0xc3]
511 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
512 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
513 %res2 = add <2 x i64> %res, %res1
517 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
519 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
520 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
522 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
523 ; X86-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
524 ; X86-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
525 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
526 ; X86-NEXT: retl # encoding: [0xc3]
528 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
530 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
531 ; X64-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
532 ; X64-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
533 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
534 ; X64-NEXT: retq # encoding: [0xc3]
535 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
536 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
537 %res2 = add <4 x i64> %res, %res1
541 declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
543 define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
544 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
546 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
547 ; X86-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
548 ; X86-NEXT: vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
549 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
550 ; X86-NEXT: retl # encoding: [0xc3]
552 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
554 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
555 ; X64-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
556 ; X64-NEXT: vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
557 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
558 ; X64-NEXT: retq # encoding: [0xc3]
559 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
560 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
561 %res2 = fadd <2 x double> %res, %res1
562 ret <2 x double> %res2
565 declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
567 define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
568 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
570 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
571 ; X86-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
572 ; X86-NEXT: vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
573 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
574 ; X86-NEXT: retl # encoding: [0xc3]
576 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
578 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
579 ; X64-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
580 ; X64-NEXT: vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
581 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
582 ; X64-NEXT: retq # encoding: [0xc3]
583 %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
584 %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
585 %res2 = fadd <4 x double> %res, %res1
586 ret <4 x double> %res2
589 declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
591 define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
592 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
594 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
595 ; X86-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
596 ; X86-NEXT: vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
597 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
598 ; X86-NEXT: retl # encoding: [0xc3]
600 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
602 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
603 ; X64-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
604 ; X64-NEXT: vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
605 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
606 ; X64-NEXT: retq # encoding: [0xc3]
607 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
608 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
609 %res2 = fadd <4 x float> %res, %res1
610 ret <4 x float> %res2
613 declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
615 define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
616 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
618 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
619 ; X86-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
620 ; X86-NEXT: vreduceps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
621 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
622 ; X86-NEXT: retl # encoding: [0xc3]
624 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
626 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
627 ; X64-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
628 ; X64-NEXT: vreduceps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
629 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
630 ; X64-NEXT: retq # encoding: [0xc3]
631 %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
632 %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
633 %res2 = fadd <8 x float> %res, %res1
634 ret <8 x float> %res2
637 declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
639 define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
640 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_128:
642 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
643 ; X86-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
644 ; X86-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
645 ; X86-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
646 ; X86-NEXT: retl # encoding: [0xc3]
648 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_128:
650 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
651 ; X64-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
652 ; X64-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
653 ; X64-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
654 ; X64-NEXT: retq # encoding: [0xc3]
655 %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
656 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
657 %res2 = fadd <2 x double> %res, %res1
658 ret <2 x double> %res2
661 declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
663 define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
664 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_256:
666 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
667 ; X86-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
668 ; X86-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
669 ; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
670 ; X86-NEXT: retl # encoding: [0xc3]
672 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_256:
674 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
675 ; X64-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
676 ; X64-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
677 ; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
678 ; X64-NEXT: retq # encoding: [0xc3]
679 %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
680 %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
681 %res2 = fadd <4 x double> %res, %res1
682 ret <4 x double> %res2
685 declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
687 define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
688 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_128:
690 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
691 ; X86-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
692 ; X86-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
693 ; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
694 ; X86-NEXT: retl # encoding: [0xc3]
696 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_128:
698 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
699 ; X64-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
700 ; X64-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
701 ; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
702 ; X64-NEXT: retq # encoding: [0xc3]
703 %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
704 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
705 %res2 = fadd <4 x float> %res, %res1
706 ret <4 x float> %res2
709 declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
711 define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
712 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_256:
714 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
715 ; X86-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
716 ; X86-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
717 ; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
718 ; X86-NEXT: retl # encoding: [0xc3]
720 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_256:
722 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
723 ; X64-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
724 ; X64-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
725 ; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
726 ; X64-NEXT: retq # encoding: [0xc3]
727 %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
728 %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
729 %res2 = fadd <8 x float> %res, %res1
730 ret <8 x float> %res2
733 declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
735 define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
736 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
738 ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
739 ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
740 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
741 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
742 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
743 %res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2)
744 %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4)
745 %1 = and <4 x i1> %res1, %res
746 %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
747 %3 = bitcast <8 x i1> %2 to i8
751 declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
753 define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
754 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
756 ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
757 ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
758 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
759 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
760 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
761 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
762 %res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2)
763 %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4)
764 %1 = and <8 x i1> %res1, %res
765 %2 = bitcast <8 x i1> %1 to i8
769 declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
771 define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
772 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
774 ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
775 ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
776 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
777 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
778 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
779 %res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4)
780 %res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2)
781 %1 = and <2 x i1> %res1, %res
782 %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
783 %3 = bitcast <8 x i1> %2 to i8
787 declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
789 define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
790 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
792 ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
793 ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
794 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
795 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
796 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
797 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
798 %res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2)
799 %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4)
800 %1 = and <4 x i1> %res1, %res
801 %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
802 %3 = bitcast <8 x i1> %2 to i8