1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
7 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
8 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
10 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
11 ; X86-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
12 ; X86-NEXT: vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
13 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
14 ; X86-NEXT: retl # encoding: [0xc3]
16 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
18 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
19 ; X64-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
20 ; X64-NEXT: vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
21 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
22 ; X64-NEXT: retq # encoding: [0xc3]
23 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
24 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
25 %res2 = add <2 x i64> %res, %res1
29 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
31 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
32 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
34 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
35 ; X86-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
36 ; X86-NEXT: vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
37 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
38 ; X86-NEXT: retl # encoding: [0xc3]
40 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
42 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
43 ; X64-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
44 ; X64-NEXT: vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
45 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
46 ; X64-NEXT: retq # encoding: [0xc3]
47 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
48 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
49 %res2 = add <4 x i64> %res, %res1
53 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
55 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
56 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
58 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
59 ; X86-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
60 ; X86-NEXT: vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
61 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
62 ; X86-NEXT: retl # encoding: [0xc3]
64 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
66 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
67 ; X64-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
68 ; X64-NEXT: vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
69 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
70 ; X64-NEXT: retq # encoding: [0xc3]
71 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
72 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
73 %res2 = add <2 x i64> %res, %res1
77 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
79 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
80 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
82 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
83 ; X86-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
84 ; X86-NEXT: vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
85 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
86 ; X86-NEXT: retl # encoding: [0xc3]
88 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
90 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
91 ; X64-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
92 ; X64-NEXT: vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
93 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
94 ; X64-NEXT: retq # encoding: [0xc3]
95 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
96 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
97 %res2 = add <4 x i64> %res, %res1
101 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
103 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
104 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
106 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
107 ; X86-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
108 ; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
109 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
110 ; X86-NEXT: retl # encoding: [0xc3]
112 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
114 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
115 ; X64-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
116 ; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
117 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
118 ; X64-NEXT: retq # encoding: [0xc3]
119 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
120 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
121 %res2 = add <2 x i64> %res, %res1
125 define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load(<2 x float>* %p) {
126 ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
128 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
129 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
130 ; X86-NEXT: retl # encoding: [0xc3]
132 ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
134 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
135 ; X64-NEXT: retq # encoding: [0xc3]
136 %x0 = load <2 x float>, <2 x float>* %p
137 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
138 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
142 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
143 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
145 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
146 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
147 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
148 ; X86-NEXT: retl # encoding: [0xc3]
150 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
152 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
153 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
154 ; X64-NEXT: retq # encoding: [0xc3]
155 %x0 = load <2 x float>, <2 x float>* %p
156 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
157 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
161 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load(<2 x float>* %p, i8 %mask) {
162 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
164 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
165 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
166 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
167 ; X86-NEXT: retl # encoding: [0xc3]
169 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
171 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
172 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
173 ; X64-NEXT: retq # encoding: [0xc3]
174 %x0 = load <2 x float>, <2 x float>* %p
175 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
176 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
181 define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_2(<2 x float>* %p) {
182 ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
184 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
185 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
186 ; X86-NEXT: retl # encoding: [0xc3]
188 ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
190 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
191 ; X64-NEXT: retq # encoding: [0xc3]
192 %x0 = load <2 x float>, <2 x float>* %p
193 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
194 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
198 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
199 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
201 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
202 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
203 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
204 ; X86-NEXT: retl # encoding: [0xc3]
206 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
208 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
209 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
210 ; X64-NEXT: retq # encoding: [0xc3]
211 %x0 = load <2 x float>, <2 x float>* %p
212 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
213 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
217 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) {
218 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
220 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
221 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
222 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
223 ; X86-NEXT: retl # encoding: [0xc3]
225 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
227 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
228 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
229 ; X64-NEXT: retq # encoding: [0xc3]
230 %x0 = load <2 x float>, <2 x float>* %p
231 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
232 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
236 define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_3(<4 x float>* %p) {
237 ; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3:
239 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
240 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
241 ; X86-NEXT: retl # encoding: [0xc3]
243 ; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3:
245 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
246 ; X64-NEXT: retq # encoding: [0xc3]
247 %x0 = load <4 x float>, <4 x float>* %p
248 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
252 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
253 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3:
255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
256 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
257 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
258 ; X86-NEXT: retl # encoding: [0xc3]
260 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3:
262 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
263 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
264 ; X64-NEXT: retq # encoding: [0xc3]
265 %x0 = load <4 x float>, <4 x float>* %p
266 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
270 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) {
271 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3:
273 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
274 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
275 ; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
276 ; X86-NEXT: retl # encoding: [0xc3]
278 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3:
280 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
281 ; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
282 ; X64-NEXT: retq # encoding: [0xc3]
283 %x0 = load <4 x float>, <4 x float>* %p
284 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
288 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
290 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
291 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
293 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
294 ; X86-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
295 ; X86-NEXT: vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
296 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
297 ; X86-NEXT: retl # encoding: [0xc3]
299 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
301 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
302 ; X64-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
303 ; X64-NEXT: vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
304 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
305 ; X64-NEXT: retq # encoding: [0xc3]
306 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
307 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
308 %res2 = add <4 x i64> %res, %res1
312 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
314 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
315 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
317 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
318 ; X86-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
319 ; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
320 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
321 ; X86-NEXT: retl # encoding: [0xc3]
323 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
325 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
326 ; X64-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
327 ; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
328 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
329 ; X64-NEXT: retq # encoding: [0xc3]
330 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
331 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
332 %res2 = add <2 x i64> %res, %res1
336 define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load(<2 x float>* %p) {
337 ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
339 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
340 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
341 ; X86-NEXT: retl # encoding: [0xc3]
343 ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
345 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
346 ; X64-NEXT: retq # encoding: [0xc3]
347 %x0 = load <2 x float>, <2 x float>* %p
348 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
349 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
353 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
354 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
356 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
357 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
358 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
359 ; X86-NEXT: retl # encoding: [0xc3]
361 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
363 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
364 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
365 ; X64-NEXT: retq # encoding: [0xc3]
366 %x0 = load <2 x float>, <2 x float>* %p
367 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
368 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
372 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) {
373 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
375 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
376 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
377 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
378 ; X86-NEXT: retl # encoding: [0xc3]
380 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
382 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
383 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
384 ; X64-NEXT: retq # encoding: [0xc3]
385 %x0 = load <2 x float>, <2 x float>* %p
386 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
387 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
391 define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_2(<2 x float>* %p) {
392 ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
394 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
395 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
396 ; X86-NEXT: retl # encoding: [0xc3]
398 ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
400 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
401 ; X64-NEXT: retq # encoding: [0xc3]
402 %x0 = load <2 x float>, <2 x float>* %p
403 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
404 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
408 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
409 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
411 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
412 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
413 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
414 ; X86-NEXT: retl # encoding: [0xc3]
416 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
418 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
419 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
420 ; X64-NEXT: retq # encoding: [0xc3]
421 %x0 = load <2 x float>, <2 x float>* %p
422 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
423 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
427 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) {
428 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
430 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
431 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
432 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
433 ; X86-NEXT: retl # encoding: [0xc3]
435 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
437 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
438 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
439 ; X64-NEXT: retq # encoding: [0xc3]
440 %x0 = load <2 x float>, <2 x float>* %p
441 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
442 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
446 define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_3(<4 x float>* %p) {
447 ; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3:
449 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
450 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
451 ; X86-NEXT: retl # encoding: [0xc3]
453 ; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3:
455 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
456 ; X64-NEXT: retq # encoding: [0xc3]
457 %x0 = load <4 x float>, <4 x float>* %p
458 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
462 define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
463 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3:
465 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
466 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
467 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
468 ; X86-NEXT: retl # encoding: [0xc3]
470 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3:
472 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
473 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
474 ; X64-NEXT: retq # encoding: [0xc3]
475 %x0 = load <4 x float>, <4 x float>* %p
476 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
480 define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) {
481 ; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3:
483 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
484 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
485 ; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
486 ; X86-NEXT: retl # encoding: [0xc3]
488 ; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3:
490 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
491 ; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
492 ; X64-NEXT: retq # encoding: [0xc3]
493 %x0 = load <4 x float>, <4 x float>* %p
494 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
498 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
500 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
501 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
503 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
504 ; X86-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
505 ; X86-NEXT: vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
506 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
507 ; X86-NEXT: retl # encoding: [0xc3]
509 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
511 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
512 ; X64-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
513 ; X64-NEXT: vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
514 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
515 ; X64-NEXT: retq # encoding: [0xc3]
516 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
517 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
518 %res2 = add <4 x i64> %res, %res1
522 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
524 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
525 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
527 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
528 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
529 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
530 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
531 ; X86-NEXT: retl # encoding: [0xc3]
533 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
535 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
536 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
537 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
538 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
539 ; X64-NEXT: retq # encoding: [0xc3]
540 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
541 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
542 %res2 = fadd <4 x float> %res, %res1
543 ret <4 x float> %res2
546 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
547 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
549 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
550 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
551 ; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
552 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
553 ; X86-NEXT: retl # encoding: [0xc3]
555 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
557 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
558 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
559 ; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
560 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
561 ; X64-NEXT: retq # encoding: [0xc3]
562 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
563 %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
564 %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
565 %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
566 %res4 = fadd <4 x float> %res1, %res3
567 ret <4 x float> %res4
570 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
571 ; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
573 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
574 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
575 ; X86-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
576 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
577 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
578 ; X86-NEXT: retl # encoding: [0xc3]
580 ; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
582 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xd0]
583 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
584 ; X64-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
585 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
586 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
587 ; X64-NEXT: retq # encoding: [0xc3]
588 %cvt1 = sitofp <4 x i64> %x0 to <4 x float>
589 %1 = bitcast i8 %x2 to <8 x i1>
590 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
591 %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
592 %cvt = sitofp <4 x i64> %x0 to <4 x float>
593 %res2 = fadd <4 x float> %2, %cvt
594 ret <4 x float> %res2
597 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
599 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
600 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
602 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
603 ; X86-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
604 ; X86-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
605 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
606 ; X86-NEXT: retl # encoding: [0xc3]
608 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
610 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
611 ; X64-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
612 ; X64-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
613 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
614 ; X64-NEXT: retq # encoding: [0xc3]
615 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
616 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
617 %res2 = add <2 x i64> %res, %res1
621 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
623 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
624 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
626 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
627 ; X86-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
628 ; X86-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
629 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
630 ; X86-NEXT: retl # encoding: [0xc3]
632 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
634 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
635 ; X64-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
636 ; X64-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
637 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
638 ; X64-NEXT: retq # encoding: [0xc3]
639 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
640 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
641 %res2 = add <4 x i64> %res, %res1
645 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
647 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
648 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
650 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
651 ; X86-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
652 ; X86-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
653 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
654 ; X86-NEXT: retl # encoding: [0xc3]
656 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
658 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
659 ; X64-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
660 ; X64-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
661 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
662 ; X64-NEXT: retq # encoding: [0xc3]
663 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
664 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
665 %res2 = add <2 x i64> %res, %res1
669 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
671 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
672 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
674 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
675 ; X86-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
676 ; X86-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
677 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
678 ; X86-NEXT: retl # encoding: [0xc3]
680 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
682 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
683 ; X64-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
684 ; X64-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
685 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
686 ; X64-NEXT: retq # encoding: [0xc3]
687 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
688 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
689 %res2 = add <4 x i64> %res, %res1
693 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
695 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
696 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
698 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
699 ; X86-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
700 ; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
701 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
702 ; X86-NEXT: retl # encoding: [0xc3]
704 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
706 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
707 ; X64-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
708 ; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
709 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
710 ; X64-NEXT: retq # encoding: [0xc3]
711 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
712 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
713 %res2 = add <2 x i64> %res, %res1
717 define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load(<2 x float>* %p) {
718 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
720 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
721 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
722 ; X86-NEXT: retl # encoding: [0xc3]
724 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
726 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
727 ; X64-NEXT: retq # encoding: [0xc3]
728 %x0 = load <2 x float>, <2 x float>* %p
729 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
730 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
734 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
735 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
737 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
738 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
739 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
740 ; X86-NEXT: retl # encoding: [0xc3]
742 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
744 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
745 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
746 ; X64-NEXT: retq # encoding: [0xc3]
747 %x0 = load <2 x float>, <2 x float>* %p
748 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
749 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
753 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load(<2 x float>* %p, i8 %mask) {
754 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
756 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
757 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
758 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
759 ; X86-NEXT: retl # encoding: [0xc3]
761 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
763 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
764 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
765 ; X64-NEXT: retq # encoding: [0xc3]
766 %x0 = load <2 x float>, <2 x float>* %p
767 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
768 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
773 define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_2(<2 x float>* %p) {
774 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
776 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
777 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
778 ; X86-NEXT: retl # encoding: [0xc3]
780 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
782 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
783 ; X64-NEXT: retq # encoding: [0xc3]
784 %x0 = load <2 x float>, <2 x float>* %p
785 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
786 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
790 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
791 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
793 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
794 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
795 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
796 ; X86-NEXT: retl # encoding: [0xc3]
798 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
800 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
801 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
802 ; X64-NEXT: retq # encoding: [0xc3]
803 %x0 = load <2 x float>, <2 x float>* %p
804 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
805 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
809 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) {
810 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
812 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
813 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
814 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
815 ; X86-NEXT: retl # encoding: [0xc3]
817 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
819 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
820 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
821 ; X64-NEXT: retq # encoding: [0xc3]
822 %x0 = load <2 x float>, <2 x float>* %p
823 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
824 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
828 define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_3(<4 x float>* %p) {
829 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3:
831 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
832 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
833 ; X86-NEXT: retl # encoding: [0xc3]
835 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3:
837 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
838 ; X64-NEXT: retq # encoding: [0xc3]
839 %x0 = load <4 x float>, <4 x float>* %p
840 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
844 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
845 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
847 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
848 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
849 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
850 ; X86-NEXT: retl # encoding: [0xc3]
852 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
854 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
855 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
856 ; X64-NEXT: retq # encoding: [0xc3]
857 %x0 = load <4 x float>, <4 x float>* %p
858 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
862 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) {
863 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
865 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
866 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
867 ; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
868 ; X86-NEXT: retl # encoding: [0xc3]
870 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
872 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
873 ; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
874 ; X64-NEXT: retq # encoding: [0xc3]
875 %x0 = load <4 x float>, <4 x float>* %p
876 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
880 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
882 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
883 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
885 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
886 ; X86-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
887 ; X86-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
888 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
889 ; X86-NEXT: retl # encoding: [0xc3]
891 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
893 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
894 ; X64-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
895 ; X64-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
896 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
897 ; X64-NEXT: retq # encoding: [0xc3]
898 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
899 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
900 %res2 = add <4 x i64> %res, %res1
904 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
906 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
907 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
909 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
910 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
911 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
912 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
913 ; X86-NEXT: retl # encoding: [0xc3]
915 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
917 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
918 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
919 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
920 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
921 ; X64-NEXT: retq # encoding: [0xc3]
922 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
923 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
924 %res2 = fadd <4 x float> %res, %res1
925 ret <4 x float> %res2
928 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
929 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
931 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
932 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
933 ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
934 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
935 ; X86-NEXT: retl # encoding: [0xc3]
937 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
939 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
940 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
941 ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
942 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
943 ; X64-NEXT: retq # encoding: [0xc3]
944 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
945 %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
946 %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
947 %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
948 %res4 = fadd <4 x float> %res1, %res3
949 ret <4 x float> %res4
952 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
954 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
955 ; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
957 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
958 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
959 ; X86-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
960 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
961 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
962 ; X86-NEXT: retl # encoding: [0xc3]
964 ; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
966 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm2 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xd0]
967 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
968 ; X64-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
969 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
970 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
971 ; X64-NEXT: retq # encoding: [0xc3]
972 %cvt1 = uitofp <4 x i64> %x0 to <4 x float>
973 %1 = bitcast i8 %x2 to <8 x i1>
974 %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
975 %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
976 %cvt = uitofp <4 x i64> %x0 to <4 x float>
977 %res2 = fadd <4 x float> %2, %cvt
978 ret <4 x float> %res2
981 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
983 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
984 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
986 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
987 ; X86-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
988 ; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
989 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
990 ; X86-NEXT: retl # encoding: [0xc3]
992 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
994 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
995 ; X64-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
996 ; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
997 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
998 ; X64-NEXT: retq # encoding: [0xc3]
999 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
1000 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
1001 %res2 = add <2 x i64> %res, %res1
1005 define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load(<2 x float>* %p) {
1006 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
1008 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1009 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1010 ; X86-NEXT: retl # encoding: [0xc3]
1012 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
1014 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1015 ; X64-NEXT: retq # encoding: [0xc3]
1016 %x0 = load <2 x float>, <2 x float>* %p
1017 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1018 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
1022 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1023 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
1025 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1026 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1027 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1028 ; X86-NEXT: retl # encoding: [0xc3]
1030 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
1032 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1033 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1034 ; X64-NEXT: retq # encoding: [0xc3]
1035 %x0 = load <2 x float>, <2 x float>* %p
1036 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1037 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
1041 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) {
1042 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
1044 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1045 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1046 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1047 ; X86-NEXT: retl # encoding: [0xc3]
1049 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
1051 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1052 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1053 ; X64-NEXT: retq # encoding: [0xc3]
1054 %x0 = load <2 x float>, <2 x float>* %p
1055 %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1056 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
1061 define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_2(<2 x float>* %p) {
1062 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
1064 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1065 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1066 ; X86-NEXT: retl # encoding: [0xc3]
1068 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
1070 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1071 ; X64-NEXT: retq # encoding: [0xc3]
1072 %x0 = load <2 x float>, <2 x float>* %p
1073 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1074 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
1078 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1079 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
1081 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1082 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1083 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1084 ; X86-NEXT: retl # encoding: [0xc3]
1086 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
1088 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1089 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1090 ; X64-NEXT: retq # encoding: [0xc3]
1091 %x0 = load <2 x float>, <2 x float>* %p
1092 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1093 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
1097 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) {
1098 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
1100 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1101 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1102 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1103 ; X86-NEXT: retl # encoding: [0xc3]
1105 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
1107 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1108 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1109 ; X64-NEXT: retq # encoding: [0xc3]
1110 %x0 = load <2 x float>, <2 x float>* %p
1111 %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1112 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
1116 define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_3(<4 x float>* %p) {
1117 ; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3:
1119 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1120 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1121 ; X86-NEXT: retl # encoding: [0xc3]
1123 ; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3:
1125 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1126 ; X64-NEXT: retq # encoding: [0xc3]
1127 %x0 = load <4 x float>, <4 x float>* %p
1128 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
1132 define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1133 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
1135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1136 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1137 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1138 ; X86-NEXT: retl # encoding: [0xc3]
1140 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
1142 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1143 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1144 ; X64-NEXT: retq # encoding: [0xc3]
1145 %x0 = load <4 x float>, <4 x float>* %p
1146 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
1150 define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) {
1151 ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
1153 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1154 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1155 ; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1156 ; X86-NEXT: retl # encoding: [0xc3]
1158 ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
1160 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1161 ; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1162 ; X64-NEXT: retq # encoding: [0xc3]
1163 %x0 = load <4 x float>, <4 x float>* %p
1164 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
1168 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
1170 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
1171 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
1173 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1174 ; X86-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
1175 ; X86-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
1176 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1177 ; X86-NEXT: retl # encoding: [0xc3]
1179 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
1181 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1182 ; X64-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
1183 ; X64-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
1184 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1185 ; X64-NEXT: retq # encoding: [0xc3]
1186 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
1187 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
1188 %res2 = add <4 x i64> %res, %res1
1192 declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
1194 define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
1195 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
1197 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1198 ; X86-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
1199 ; X86-NEXT: vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
1200 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1201 ; X86-NEXT: retl # encoding: [0xc3]
1203 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
1205 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1206 ; X64-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
1207 ; X64-NEXT: vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
1208 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1209 ; X64-NEXT: retq # encoding: [0xc3]
1210 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
1211 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
1212 %res2 = fadd <2 x double> %res, %res1
1213 ret <2 x double> %res2
1216 declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
1218 define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
1219 ; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
1221 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1222 ; X86-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
1223 ; X86-NEXT: vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
1224 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1225 ; X86-NEXT: retl # encoding: [0xc3]
1227 ; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
1229 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1230 ; X64-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
1231 ; X64-NEXT: vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
1232 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1233 ; X64-NEXT: retq # encoding: [0xc3]
1234 %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
1235 %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
1236 %res2 = fadd <4 x double> %res, %res1
1237 ret <4 x double> %res2
1240 declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
1242 define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
1243 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
1245 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1246 ; X86-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
1247 ; X86-NEXT: vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
1248 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1249 ; X86-NEXT: retl # encoding: [0xc3]
1251 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
1253 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1254 ; X64-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
1255 ; X64-NEXT: vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
1256 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1257 ; X64-NEXT: retq # encoding: [0xc3]
1258 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
1259 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
1260 %res2 = fadd <4 x float> %res, %res1
1261 ret <4 x float> %res2
1264 declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
1266 define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
1267 ; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
1269 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1270 ; X86-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
1271 ; X86-NEXT: vreduceps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
1272 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1273 ; X86-NEXT: retl # encoding: [0xc3]
1275 ; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
1277 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1278 ; X64-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
1279 ; X64-NEXT: vreduceps $11, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
1280 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1281 ; X64-NEXT: retq # encoding: [0xc3]
1282 %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
1283 %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
1284 %res2 = fadd <8 x float> %res, %res1
1285 ret <8 x float> %res2
1288 declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
1290 define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
1291 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_128:
1293 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1294 ; X86-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
1295 ; X86-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
1296 ; X86-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1297 ; X86-NEXT: retl # encoding: [0xc3]
1299 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_128:
1301 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1302 ; X64-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
1303 ; X64-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
1304 ; X64-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1305 ; X64-NEXT: retq # encoding: [0xc3]
1306 %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
1307 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
1308 %res2 = fadd <2 x double> %res, %res1
1309 ret <2 x double> %res2
1312 declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
1314 define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
1315 ; X86-LABEL: test_int_x86_avx512_mask_range_pd_256:
1317 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1318 ; X86-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
1319 ; X86-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
1320 ; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1321 ; X86-NEXT: retl # encoding: [0xc3]
1323 ; X64-LABEL: test_int_x86_avx512_mask_range_pd_256:
1325 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1326 ; X64-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
1327 ; X64-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
1328 ; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1329 ; X64-NEXT: retq # encoding: [0xc3]
1330 %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
1331 %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
1332 %res2 = fadd <4 x double> %res, %res1
1333 ret <4 x double> %res2
1336 declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
1338 define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
1339 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_128:
1341 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1342 ; X86-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
1343 ; X86-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
1344 ; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1345 ; X86-NEXT: retl # encoding: [0xc3]
1347 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_128:
1349 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1350 ; X64-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
1351 ; X64-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
1352 ; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1353 ; X64-NEXT: retq # encoding: [0xc3]
1354 %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
1355 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
1356 %res2 = fadd <4 x float> %res, %res1
1357 ret <4 x float> %res2
1360 declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
1362 define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
1363 ; X86-LABEL: test_int_x86_avx512_mask_range_ps_256:
1365 ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1366 ; X86-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
1367 ; X86-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
1368 ; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1369 ; X86-NEXT: retl # encoding: [0xc3]
1371 ; X64-LABEL: test_int_x86_avx512_mask_range_ps_256:
1373 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1374 ; X64-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
1375 ; X64-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
1376 ; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1377 ; X64-NEXT: retq # encoding: [0xc3]
1378 %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
1379 %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
1380 %res2 = fadd <8 x float> %res, %res1
1381 ret <8 x float> %res2
1384 declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
1386 define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
1387 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
1389 ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
1390 ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
1391 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1392 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1393 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1394 %res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2)
1395 %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4)
1396 %1 = and <4 x i1> %res1, %res
1397 %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1398 %3 = bitcast <8 x i1> %2 to i8
1402 declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
1404 define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
1405 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
1407 ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
1408 ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
1409 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1410 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1411 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1412 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1413 %res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2)
1414 %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4)
1415 %1 = and <8 x i1> %res1, %res
1416 %2 = bitcast <8 x i1> %1 to i8
1420 declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
1422 define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
1423 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
1425 ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
1426 ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
1427 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1428 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1429 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1430 %res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4)
1431 %res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2)
1432 %1 = and <2 x i1> %res1, %res
1433 %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
1434 %3 = bitcast <8 x i1> %2 to i8
1438 declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
1440 define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
1441 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
1443 ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
1444 ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
1445 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1446 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1447 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1448 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1449 %res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2)
1450 %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4)
1451 %1 = and <4 x i1> %res1, %res
1452 %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1453 %3 = bitcast <8 x i1> %2 to i8