1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
7 define <2 x i64>@test_int_x86_avx512_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1) {
8 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2qq_128:
10 ; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
11 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
12 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
16 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
17 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
19 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
20 ; X86-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
21 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
22 ; X86-NEXT: retl # encoding: [0xc3]
24 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
26 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
27 ; X64-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
28 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
29 ; X64-NEXT: retq # encoding: [0xc3]
30 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
34 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
36 define <4 x i64>@test_int_x86_avx512_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1) {
37 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2qq_256:
39 ; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
40 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
41 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
45 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
46 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
48 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
49 ; X86-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
50 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
51 ; X86-NEXT: retl # encoding: [0xc3]
53 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
55 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
56 ; X64-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
57 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
58 ; X64-NEXT: retq # encoding: [0xc3]
59 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
63 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
65 define <2 x i64>@test_int_x86_avx512_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1) {
66 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2uqq_128:
68 ; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
69 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
70 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
74 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
75 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
77 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
78 ; X86-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
79 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
80 ; X86-NEXT: retl # encoding: [0xc3]
82 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
84 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
85 ; X64-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
86 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
87 ; X64-NEXT: retq # encoding: [0xc3]
88 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
92 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
94 define <4 x i64>@test_int_x86_avx512_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1) {
95 ; CHECK-LABEL: test_int_x86_avx512_cvt_pd2uqq_256:
97 ; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
98 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
99 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
103 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
104 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
106 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
107 ; X86-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
108 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
109 ; X86-NEXT: retl # encoding: [0xc3]
111 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
113 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
114 ; X64-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
115 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
116 ; X64-NEXT: retq # encoding: [0xc3]
117 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
121 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
123 define <2 x i64>@test_int_x86_avx512_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1) {
124 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2qq_128:
126 ; CHECK-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
127 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
128 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
132 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
133 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
135 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
136 ; X86-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
137 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
138 ; X86-NEXT: retl # encoding: [0xc3]
140 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
142 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
143 ; X64-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
144 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
145 ; X64-NEXT: retq # encoding: [0xc3]
146 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
150 define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load(<2 x float>* %p) {
151 ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
153 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
154 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
155 ; X86-NEXT: retl # encoding: [0xc3]
157 ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
159 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
160 ; X64-NEXT: retq # encoding: [0xc3]
161 %x0 = load <2 x float>, <2 x float>* %p
162 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
167 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
168 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
170 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
171 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
172 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
173 ; X86-NEXT: retl # encoding: [0xc3]
175 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
177 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
178 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
179 ; X64-NEXT: retq # encoding: [0xc3]
180 %x0 = load <2 x float>, <2 x float>* %p
181 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
182 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
186 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load(<2 x float>* %p, i8 %mask) {
187 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
189 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
190 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
191 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
192 ; X86-NEXT: retl # encoding: [0xc3]
194 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
196 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
197 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
198 ; X64-NEXT: retq # encoding: [0xc3]
199 %x0 = load <2 x float>, <2 x float>* %p
200 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
201 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
206 define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_2(<2 x float>* %p) {
207 ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
209 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
210 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
211 ; X86-NEXT: retl # encoding: [0xc3]
213 ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
215 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
216 ; X64-NEXT: retq # encoding: [0xc3]
217 %x0 = load <2 x float>, <2 x float>* %p
218 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
219 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
223 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
224 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
228 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
229 ; X86-NEXT: retl # encoding: [0xc3]
231 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
233 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
234 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
235 ; X64-NEXT: retq # encoding: [0xc3]
236 %x0 = load <2 x float>, <2 x float>* %p
237 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
238 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
242 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) {
243 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
245 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
246 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
247 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
248 ; X86-NEXT: retl # encoding: [0xc3]
250 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
252 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
253 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
254 ; X64-NEXT: retq # encoding: [0xc3]
255 %x0 = load <2 x float>, <2 x float>* %p
256 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
257 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
261 define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_3(<4 x float>* %p) {
262 ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3:
264 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
265 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
266 ; X86-NEXT: retl # encoding: [0xc3]
268 ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3:
270 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
271 ; X64-NEXT: retq # encoding: [0xc3]
272 %x0 = load <4 x float>, <4 x float>* %p
273 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
277 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
278 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3:
280 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
281 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
282 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
283 ; X86-NEXT: retl # encoding: [0xc3]
285 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3:
287 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
288 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
289 ; X64-NEXT: retq # encoding: [0xc3]
290 %x0 = load <4 x float>, <4 x float>* %p
291 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
295 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) {
296 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3:
298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
299 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
300 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
301 ; X86-NEXT: retl # encoding: [0xc3]
303 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3:
305 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
306 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
307 ; X64-NEXT: retq # encoding: [0xc3]
308 %x0 = load <4 x float>, <4 x float>* %p
309 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
313 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
315 define <4 x i64>@test_int_x86_avx512_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1) {
316 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2qq_256:
318 ; CHECK-NEXT: vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
319 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
320 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
324 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
325 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
327 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
328 ; X86-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
329 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
330 ; X86-NEXT: retl # encoding: [0xc3]
332 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
334 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
335 ; X64-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
336 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
337 ; X64-NEXT: retq # encoding: [0xc3]
338 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
342 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
344 define <2 x i64>@test_int_x86_avx512_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1) {
345 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2uqq_128:
347 ; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
348 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
349 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
353 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
354 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
356 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
357 ; X86-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
358 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
359 ; X86-NEXT: retl # encoding: [0xc3]
361 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
363 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
364 ; X64-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
365 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
366 ; X64-NEXT: retq # encoding: [0xc3]
367 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
371 define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load(<2 x float>* %p) {
372 ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
374 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
375 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
376 ; X86-NEXT: retl # encoding: [0xc3]
378 ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
380 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
381 ; X64-NEXT: retq # encoding: [0xc3]
382 %x0 = load <2 x float>, <2 x float>* %p
383 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
384 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
388 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
389 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
391 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
392 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
393 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
394 ; X86-NEXT: retl # encoding: [0xc3]
396 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
398 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
399 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
400 ; X64-NEXT: retq # encoding: [0xc3]
401 %x0 = load <2 x float>, <2 x float>* %p
402 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
403 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
407 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) {
408 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
410 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
411 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
412 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
413 ; X86-NEXT: retl # encoding: [0xc3]
415 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
417 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
418 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
419 ; X64-NEXT: retq # encoding: [0xc3]
420 %x0 = load <2 x float>, <2 x float>* %p
421 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
426 define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_2(<2 x float>* %p) {
427 ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
429 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
430 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
431 ; X86-NEXT: retl # encoding: [0xc3]
433 ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
435 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
436 ; X64-NEXT: retq # encoding: [0xc3]
437 %x0 = load <2 x float>, <2 x float>* %p
438 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
439 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
443 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
444 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
446 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
447 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
448 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
449 ; X86-NEXT: retl # encoding: [0xc3]
451 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
453 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
454 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
455 ; X64-NEXT: retq # encoding: [0xc3]
456 %x0 = load <2 x float>, <2 x float>* %p
457 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
458 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
462 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) {
463 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
465 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
466 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
467 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
468 ; X86-NEXT: retl # encoding: [0xc3]
470 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
472 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
473 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
474 ; X64-NEXT: retq # encoding: [0xc3]
475 %x0 = load <2 x float>, <2 x float>* %p
476 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
477 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
481 define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_3(<4 x float>* %p) {
482 ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3:
484 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
485 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
486 ; X86-NEXT: retl # encoding: [0xc3]
488 ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3:
490 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
491 ; X64-NEXT: retq # encoding: [0xc3]
492 %x0 = load <4 x float>, <4 x float>* %p
493 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
497 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
498 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3:
500 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
501 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
502 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
503 ; X86-NEXT: retl # encoding: [0xc3]
505 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3:
507 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
508 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
509 ; X64-NEXT: retq # encoding: [0xc3]
510 %x0 = load <4 x float>, <4 x float>* %p
511 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
515 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) {
516 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3:
518 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
519 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
520 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
521 ; X86-NEXT: retl # encoding: [0xc3]
523 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3:
525 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
526 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
527 ; X64-NEXT: retq # encoding: [0xc3]
528 %x0 = load <4 x float>, <4 x float>* %p
529 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
533 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
535 define <4 x i64>@test_int_x86_avx512_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1) {
536 ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2uqq_256:
538 ; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
539 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
540 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
544 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
545 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
547 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
548 ; X86-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
549 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
550 ; X86-NEXT: retl # encoding: [0xc3]
552 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
554 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
555 ; X64-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
556 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
557 ; X64-NEXT: retq # encoding: [0xc3]
558 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
562 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
564 define <4 x float>@test_int_x86_avx512_ask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1) {
565 ; CHECK-LABEL: test_int_x86_avx512_ask_cvt_qq2ps_128:
567 ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
568 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
569 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
573 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
574 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
576 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
577 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
578 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
579 ; X86-NEXT: retl # encoding: [0xc3]
581 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
583 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
584 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
585 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
586 ; X64-NEXT: retq # encoding: [0xc3]
587 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
591 define <4 x float>@test_int_x86_avx512_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1) {
592 ; CHECK-LABEL: test_int_x86_avx512_cvt_qq2ps_128_zext:
594 ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
595 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
596 %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
597 %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
598 ret <4 x float> %res3
601 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
602 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
604 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
605 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
606 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
607 ; X86-NEXT: retl # encoding: [0xc3]
609 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
611 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
612 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
613 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
614 ; X64-NEXT: retq # encoding: [0xc3]
615 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
616 %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
617 ret <4 x float> %res1
621 define <4 x float>@test_int_x86_avx512_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1) {
622 ; CHECK-LABEL: test_int_x86_avx512_cvt_qq2ps_256:
624 ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0]
625 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
626 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
627 %cvt = sitofp <4 x i64> %x0 to <4 x float>
631 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
632 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
634 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
635 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
636 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
637 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
638 ; X86-NEXT: retl # encoding: [0xc3]
640 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
642 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
643 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
644 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
645 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
646 ; X64-NEXT: retq # encoding: [0xc3]
647 %cvt1 = sitofp <4 x i64> %x0 to <4 x float>
648 %1 = bitcast i8 %x2 to <8 x i1>
649 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
650 %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
654 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
656 define <2 x i64>@test_int_x86_avx512_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1) {
657 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2qq_128:
659 ; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
660 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
661 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
665 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
666 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
668 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
669 ; X86-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
670 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
671 ; X86-NEXT: retl # encoding: [0xc3]
673 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
675 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
676 ; X64-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
677 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
678 ; X64-NEXT: retq # encoding: [0xc3]
679 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
683 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
685 define <4 x i64>@test_int_x86_avx512_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1) {
686 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2qq_256:
688 ; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
689 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
690 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
694 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
695 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
697 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
698 ; X86-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
699 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
700 ; X86-NEXT: retl # encoding: [0xc3]
702 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
704 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
705 ; X64-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
706 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
707 ; X64-NEXT: retq # encoding: [0xc3]
708 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
712 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
714 define <2 x i64>@test_int_x86_avx512_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1) {
715 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2uqq_128:
717 ; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
718 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
719 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
723 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
724 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
726 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
727 ; X86-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
728 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
729 ; X86-NEXT: retl # encoding: [0xc3]
731 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
733 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
734 ; X64-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
735 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
736 ; X64-NEXT: retq # encoding: [0xc3]
737 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
741 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
743 define <4 x i64>@test_int_x86_avx512_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1) {
744 ; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2uqq_256:
746 ; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
747 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
748 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
752 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
753 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
755 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
756 ; X86-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
757 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
758 ; X86-NEXT: retl # encoding: [0xc3]
760 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
762 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
763 ; X64-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
764 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
765 ; X64-NEXT: retq # encoding: [0xc3]
766 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
770 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
772 define <2 x i64>@test_int_x86_avx512_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1) {
773 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2qq_128:
775 ; CHECK-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
776 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
777 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
781 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
782 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
784 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
785 ; X86-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
786 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
787 ; X86-NEXT: retl # encoding: [0xc3]
789 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
791 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
792 ; X64-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
793 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
794 ; X64-NEXT: retq # encoding: [0xc3]
795 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
799 define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load(<2 x float>* %p) {
800 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
802 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
803 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
804 ; X86-NEXT: retl # encoding: [0xc3]
806 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
808 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
809 ; X64-NEXT: retq # encoding: [0xc3]
810 %x0 = load <2 x float>, <2 x float>* %p
811 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
812 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
816 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
817 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
819 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
820 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
821 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
822 ; X86-NEXT: retl # encoding: [0xc3]
824 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
826 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
827 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
828 ; X64-NEXT: retq # encoding: [0xc3]
829 %x0 = load <2 x float>, <2 x float>* %p
830 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
831 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
835 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load(<2 x float>* %p, i8 %mask) {
836 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
838 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
839 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
840 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
841 ; X86-NEXT: retl # encoding: [0xc3]
843 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
845 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
846 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
847 ; X64-NEXT: retq # encoding: [0xc3]
848 %x0 = load <2 x float>, <2 x float>* %p
849 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
850 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
855 define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_2(<2 x float>* %p) {
856 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
858 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
859 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
860 ; X86-NEXT: retl # encoding: [0xc3]
862 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
864 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
865 ; X64-NEXT: retq # encoding: [0xc3]
866 %x0 = load <2 x float>, <2 x float>* %p
867 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
868 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
872 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
873 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
875 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
876 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
877 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
878 ; X86-NEXT: retl # encoding: [0xc3]
880 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
882 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
883 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
884 ; X64-NEXT: retq # encoding: [0xc3]
885 %x0 = load <2 x float>, <2 x float>* %p
886 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
887 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
891 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) {
892 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
894 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
895 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
896 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
897 ; X86-NEXT: retl # encoding: [0xc3]
899 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
901 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
902 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
903 ; X64-NEXT: retq # encoding: [0xc3]
904 %x0 = load <2 x float>, <2 x float>* %p
905 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
906 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
910 define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_3(<4 x float>* %p) {
911 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3:
913 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
914 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
915 ; X86-NEXT: retl # encoding: [0xc3]
917 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3:
919 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
920 ; X64-NEXT: retq # encoding: [0xc3]
921 %x0 = load <4 x float>, <4 x float>* %p
922 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
926 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
927 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
929 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
930 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
931 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
932 ; X86-NEXT: retl # encoding: [0xc3]
934 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
936 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
937 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
938 ; X64-NEXT: retq # encoding: [0xc3]
939 %x0 = load <4 x float>, <4 x float>* %p
940 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
944 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) {
945 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
947 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
948 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
949 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
950 ; X86-NEXT: retl # encoding: [0xc3]
952 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
954 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
955 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
956 ; X64-NEXT: retq # encoding: [0xc3]
957 %x0 = load <4 x float>, <4 x float>* %p
958 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
962 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
964 define <4 x i64>@test_int_x86_avx512_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1) {
965 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2qq_256:
967 ; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
968 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
969 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
973 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
974 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
976 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
977 ; X86-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
978 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
979 ; X86-NEXT: retl # encoding: [0xc3]
981 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
983 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
984 ; X64-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
985 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
986 ; X64-NEXT: retq # encoding: [0xc3]
987 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
991 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
993 define <4 x float>@test_int_x86_avx512_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1) {
994 ; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_128:
996 ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
997 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
998 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
1002 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
1003 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
1005 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1006 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1007 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1008 ; X86-NEXT: retl # encoding: [0xc3]
1010 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
1012 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1013 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1014 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1015 ; X64-NEXT: retq # encoding: [0xc3]
1016 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
1017 ret <4 x float> %res
1020 define <4 x float>@test_int_x86_avx512_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1) {
1021 ; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_128_zext:
1023 ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
1024 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1025 %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
1026 %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1027 ret <4 x float> %res3
1030 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
1031 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
1033 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1034 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1035 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1036 ; X86-NEXT: retl # encoding: [0xc3]
1038 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
1040 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1041 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1042 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1043 ; X64-NEXT: retq # encoding: [0xc3]
1044 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
1045 %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1046 ret <4 x float> %res1
1049 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
1051 define <4 x float>@test_int_x86_avx512_cvt_uqq2ps_256(<4 x i64> %x0) {
1052 ; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_256:
1054 ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0]
1055 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1056 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1057 %cvt = uitofp <4 x i64> %x0 to <4 x float>
1058 ret <4 x float> %cvt
1061 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
1062 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
1064 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1065 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
1066 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1067 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1068 ; X86-NEXT: retl # encoding: [0xc3]
1070 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
1072 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1073 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
1074 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1075 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1076 ; X64-NEXT: retq # encoding: [0xc3]
1077 %cvt = uitofp <4 x i64> %x0 to <4 x float>
1078 %1 = bitcast i8 %x2 to <8 x i1>
1079 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1080 %2 = select <4 x i1> %extract, <4 x float> %cvt, <4 x float> %x1
1084 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
1086 define <2 x i64>@test_int_x86_avx512_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1) {
1087 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128:
1089 ; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
1090 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1091 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
1095 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
1096 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
1098 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1099 ; X86-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
1100 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1101 ; X86-NEXT: retl # encoding: [0xc3]
1103 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
1105 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1106 ; X64-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
1107 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1108 ; X64-NEXT: retq # encoding: [0xc3]
1109 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
1113 define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load(<2 x float>* %p) {
1114 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
1116 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1117 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1118 ; X86-NEXT: retl # encoding: [0xc3]
1120 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
1122 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1123 ; X64-NEXT: retq # encoding: [0xc3]
1124 %x0 = load <2 x float>, <2 x float>* %p
1125 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1126 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
1130 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1131 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
1133 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1134 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1135 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1136 ; X86-NEXT: retl # encoding: [0xc3]
1138 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
1140 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1141 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1142 ; X64-NEXT: retq # encoding: [0xc3]
1143 %x0 = load <2 x float>, <2 x float>* %p
1144 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1145 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
1149 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) {
1150 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
1152 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1153 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1154 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1155 ; X86-NEXT: retl # encoding: [0xc3]
1157 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
1159 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1160 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1161 ; X64-NEXT: retq # encoding: [0xc3]
1162 %x0 = load <2 x float>, <2 x float>* %p
1163 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1164 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
1169 define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_2(<2 x float>* %p) {
1170 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
1172 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1173 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1174 ; X86-NEXT: retl # encoding: [0xc3]
1176 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
1178 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1179 ; X64-NEXT: retq # encoding: [0xc3]
1180 %x0 = load <2 x float>, <2 x float>* %p
1181 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1182 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
1186 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1187 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
1189 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1190 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1191 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1192 ; X86-NEXT: retl # encoding: [0xc3]
1194 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
1196 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1197 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1198 ; X64-NEXT: retq # encoding: [0xc3]
1199 %x0 = load <2 x float>, <2 x float>* %p
1200 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1201 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
1205 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) {
1206 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
1208 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1209 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1210 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1211 ; X86-NEXT: retl # encoding: [0xc3]
1213 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
1215 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1216 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1217 ; X64-NEXT: retq # encoding: [0xc3]
1218 %x0 = load <2 x float>, <2 x float>* %p
1219 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1220 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
1224 define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_3(<4 x float>* %p) {
1225 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3:
1227 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1228 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1229 ; X86-NEXT: retl # encoding: [0xc3]
1231 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3:
1233 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1234 ; X64-NEXT: retq # encoding: [0xc3]
1235 %x0 = load <4 x float>, <4 x float>* %p
1236 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
1240 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1241 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
1243 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1244 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1245 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1246 ; X86-NEXT: retl # encoding: [0xc3]
1248 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
1250 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1251 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1252 ; X64-NEXT: retq # encoding: [0xc3]
1253 %x0 = load <4 x float>, <4 x float>* %p
1254 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
1258 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) {
1259 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
1261 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1262 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1263 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1264 ; X86-NEXT: retl # encoding: [0xc3]
1266 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
1268 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1269 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1270 ; X64-NEXT: retq # encoding: [0xc3]
1271 %x0 = load <4 x float>, <4 x float>* %p
1272 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
1276 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
1278 define <4 x i64>@test_int_x86_avx512_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1) {
1279 ; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2uqq_256:
1281 ; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
1282 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1283 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
1287 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
1288 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
1290 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1291 ; X86-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
1292 ; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1293 ; X86-NEXT: retl # encoding: [0xc3]
1295 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
1297 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1298 ; X64-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
1299 ; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1300 ; X64-NEXT: retq # encoding: [0xc3]
1301 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
1305 declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
1307 define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
1308 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
1310 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1311 ; X86-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
1312 ; X86-NEXT: vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
1313 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1314 ; X86-NEXT: retl # encoding: [0xc3]
1316 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
1318 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1319 ; X64-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
1320 ; X64-NEXT: vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
1321 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1322 ; X64-NEXT: retq # encoding: [0xc3]
1323 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
1324 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
1325 %res2 = fadd <2 x double> %res, %res1
1326 ret <2 x double> %res2
1329 declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
1331 define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
1332 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
1334 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1335 ; X86-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
1336 ; X86-NEXT: vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
1337 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1338 ; X86-NEXT: retl # encoding: [0xc3]
1340 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
1342 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1343 ; X64-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
1344 ; X64-NEXT: vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
1345 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1346 ; X64-NEXT: retq # encoding: [0xc3]
1347 %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
1348 %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
1349 %res2 = fadd <4 x double> %res, %res1
1350 ret <4 x double> %res2
1353 declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
1355 define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
1356 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
1358 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1359 ; X86-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
1360 ; X86-NEXT: vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
1361 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1362 ; X86-NEXT: retl # encoding: [0xc3]
1364 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
1366 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1367 ; X64-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
1368 ; X64-NEXT: vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
1369 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1370 ; X64-NEXT: retq # encoding: [0xc3]
1371 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
1372 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
1373 %res2 = fadd <4 x float> %res, %res1
1374 ret <4 x float> %res2
1377 declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
1379 define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
1380 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
1382 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1383 ; X86-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
1384 ; X86-NEXT: vreduceps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0c]
1385 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1386 ; X86-NEXT: retl # encoding: [0xc3]
1388 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
1390 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1391 ; X64-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
1392 ; X64-NEXT: vreduceps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0c]
1393 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1394 ; X64-NEXT: retq # encoding: [0xc3]
1395 %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
1396 %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 12, <8 x float> %x2, i8 -1)
1397 %res2 = fadd <8 x float> %res, %res1
1398 ret <8 x float> %res2
1401 declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
1403 define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
1404 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_128:
1406 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1407 ; X86-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
1408 ; X86-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
1409 ; X86-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1410 ; X86-NEXT: retl # encoding: [0xc3]
1412 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_128:
1414 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1415 ; X64-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
1416 ; X64-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
1417 ; X64-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1418 ; X64-NEXT: retq # encoding: [0xc3]
1419 %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
1420 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
1421 %res2 = fadd <2 x double> %res, %res1
1422 ret <2 x double> %res2
1425 declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
1427 define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
1428 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_256:
1430 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1431 ; X86-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
1432 ; X86-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
1433 ; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1434 ; X86-NEXT: retl # encoding: [0xc3]
1436 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_256:
1438 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1439 ; X64-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
1440 ; X64-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
1441 ; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1442 ; X64-NEXT: retq # encoding: [0xc3]
1443 %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
1444 %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
1445 %res2 = fadd <4 x double> %res, %res1
1446 ret <4 x double> %res2
1449 declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
1451 define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
1452 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_128:
1454 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1455 ; X86-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
1456 ; X86-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
1457 ; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1458 ; X86-NEXT: retl # encoding: [0xc3]
1460 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_128:
1462 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1463 ; X64-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
1464 ; X64-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
1465 ; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1466 ; X64-NEXT: retq # encoding: [0xc3]
1467 %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
1468 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
1469 %res2 = fadd <4 x float> %res, %res1
1470 ret <4 x float> %res2
1473 declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
1475 define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
1476 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_256:
1478 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1479 ; X86-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
1480 ; X86-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
1481 ; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1482 ; X86-NEXT: retl # encoding: [0xc3]
1484 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_256:
1486 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1487 ; X64-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
1488 ; X64-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
1489 ; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1490 ; X64-NEXT: retq # encoding: [0xc3]
1491 %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
1492 %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
1493 %res2 = fadd <8 x float> %res, %res1
1494 ret <8 x float> %res2
1497 declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
1499 define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
1500 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
1502 ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
1503 ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
1504 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1505 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1506 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1507 %res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2)
1508 %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4)
1509 %1 = and <4 x i1> %res1, %res
1510 %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1511 %3 = bitcast <8 x i1> %2 to i8
1515 declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
1517 define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
1518 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
1520 ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
1521 ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
1522 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1523 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1524 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1525 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1526 %res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2)
1527 %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4)
1528 %1 = and <8 x i1> %res1, %res
1529 %2 = bitcast <8 x i1> %1 to i8
1533 declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
1535 define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
1536 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
1538 ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
1539 ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
1540 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1541 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1542 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1543 %res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4)
1544 %res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2)
1545 %1 = and <2 x i1> %res1, %res
1546 %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
1547 %3 = bitcast <8 x i1> %2 to i8
1551 declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
1553 define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
1554 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
1556 ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
1557 ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
1558 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1559 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1560 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1561 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1562 %res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2)
1563 %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4)
1564 %1 = and <4 x i1> %res1, %res
1565 %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1566 %3 = bitcast <8 x i1> %2 to i8