1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
5 define <4 x i32> @test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
6 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
8 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
9 ; X86-NEXT: kmovw %eax, %k1
10 ; X86-NEXT: vplzcntd %xmm0, %xmm1 {%k1}
11 ; X86-NEXT: vmovdqa %xmm1, %xmm0
14 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
16 ; X64-NEXT: kmovw %edi, %k1
17 ; X64-NEXT: vplzcntd %xmm0, %xmm1 {%k1}
18 ; X64-NEXT: vmovdqa %xmm1, %xmm0
20 %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false)
21 %2 = bitcast i8 %x2 to <8 x i1>
22 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
23 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
27 define <4 x i32> @test_int_x86_avx512_maskz_vplzcnt_d_128(<4 x i32> %x0, i8 %x2) {
28 ; X86-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
30 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
31 ; X86-NEXT: kmovw %eax, %k1
32 ; X86-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z}
35 ; X64-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
37 ; X64-NEXT: kmovw %edi, %k1
38 ; X64-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z}
40 %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false)
41 %2 = bitcast i8 %x2 to <8 x i1>
42 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer
46 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #0
48 define <8 x i32> @test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
49 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
51 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
52 ; X86-NEXT: kmovw %eax, %k1
53 ; X86-NEXT: vplzcntd %ymm0, %ymm1 {%k1}
54 ; X86-NEXT: vmovdqa %ymm1, %ymm0
57 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
59 ; X64-NEXT: kmovw %edi, %k1
60 ; X64-NEXT: vplzcntd %ymm0, %ymm1 {%k1}
61 ; X64-NEXT: vmovdqa %ymm1, %ymm0
63 %1 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %x0, i1 false)
64 %2 = bitcast i8 %x2 to <8 x i1>
65 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
68 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) #0
70 define <2 x i64> @test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
71 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
73 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
74 ; X86-NEXT: kmovw %eax, %k1
75 ; X86-NEXT: vplzcntq %xmm0, %xmm1 {%k1}
76 ; X86-NEXT: vmovdqa %xmm1, %xmm0
79 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
81 ; X64-NEXT: kmovw %edi, %k1
82 ; X64-NEXT: vplzcntq %xmm0, %xmm1 {%k1}
83 ; X64-NEXT: vmovdqa %xmm1, %xmm0
85 %1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x0, i1 false)
86 %2 = bitcast i8 %x2 to <8 x i1>
87 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
88 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
91 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
93 define <4 x i64> @test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
94 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
96 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
97 ; X86-NEXT: kmovw %eax, %k1
98 ; X86-NEXT: vplzcntq %ymm0, %ymm1 {%k1}
99 ; X86-NEXT: vmovdqa %ymm1, %ymm0
102 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
104 ; X64-NEXT: kmovw %edi, %k1
105 ; X64-NEXT: vplzcntq %ymm0, %ymm1 {%k1}
106 ; X64-NEXT: vmovdqa %ymm1, %ymm0
108 %1 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %x0, i1 false)
109 %2 = bitcast i8 %x2 to <8 x i1>
110 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
114 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0
116 define <4 x i32> @test_int_x86_avx512_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1) {
117 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_128:
119 ; CHECK-NEXT: vpconflictd %xmm0, %xmm0
120 ; CHECK-NEXT: ret{{[l|q]}}
121 %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
125 define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
126 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
128 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
129 ; X86-NEXT: kmovw %eax, %k1
130 ; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
131 ; X86-NEXT: vmovdqa %xmm1, %xmm0
134 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
136 ; X64-NEXT: kmovw %edi, %k1
137 ; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
138 ; X64-NEXT: vmovdqa %xmm1, %xmm0
140 %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
141 %2 = bitcast i8 %x2 to <8 x i1>
142 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
143 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
147 define <4 x i32> @test_int_x86_avx512_maskz_vpconflict_d_128(<4 x i32> %x0, i8 %x2) {
148 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
150 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
151 ; X86-NEXT: kmovw %eax, %k1
152 ; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
155 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
157 ; X64-NEXT: kmovw %edi, %k1
158 ; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
160 %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
161 %2 = bitcast i8 %x2 to <8 x i1>
162 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer
167 define <8 x i32> @test_int_x86_avx512_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1) {
168 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_256:
170 ; CHECK-NEXT: vpconflictd %ymm0, %ymm0
171 ; CHECK-NEXT: ret{{[l|q]}}
172 %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
176 define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
177 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
180 ; X86-NEXT: kmovw %eax, %k1
181 ; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
182 ; X86-NEXT: vmovdqa %ymm1, %ymm0
185 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
187 ; X64-NEXT: kmovw %edi, %k1
188 ; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
189 ; X64-NEXT: vmovdqa %ymm1, %ymm0
191 %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
192 %2 = bitcast i8 %x2 to <8 x i1>
193 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
197 define <8 x i32> @test_int_x86_avx512_maskz_vpconflict_d_256(<8 x i32> %x0, i8 %x2) {
198 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
200 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
201 ; X86-NEXT: kmovw %eax, %k1
202 ; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
205 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
207 ; X64-NEXT: kmovw %edi, %k1
208 ; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
210 %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
211 %2 = bitcast i8 %x2 to <8 x i1>
212 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
216 define <2 x i64> @test_int_x86_avx512_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1) {
217 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_128:
219 ; CHECK-NEXT: vpconflictq %xmm0, %xmm0
220 ; CHECK-NEXT: ret{{[l|q]}}
221 %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
225 define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
226 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
228 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
229 ; X86-NEXT: kmovw %eax, %k1
230 ; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
231 ; X86-NEXT: vmovdqa %xmm1, %xmm0
234 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
236 ; X64-NEXT: kmovw %edi, %k1
237 ; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
238 ; X64-NEXT: vmovdqa %xmm1, %xmm0
240 %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
241 %2 = bitcast i8 %x2 to <8 x i1>
242 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
243 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x1
247 define <2 x i64> @test_int_x86_avx512_maskz_vpconflict_q_128(<2 x i64> %x0, i8 %x2) {
248 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
250 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
251 ; X86-NEXT: kmovw %eax, %k1
252 ; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
255 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
257 ; X64-NEXT: kmovw %edi, %k1
258 ; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
260 %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
261 %2 = bitcast i8 %x2 to <8 x i1>
262 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
263 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> zeroinitializer
267 define <4 x i64> @test_int_x86_avx512_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1) {
268 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_256:
270 ; CHECK-NEXT: vpconflictq %ymm0, %ymm0
271 ; CHECK-NEXT: ret{{[l|q]}}
272 %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
276 define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
277 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
279 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
280 ; X86-NEXT: kmovw %eax, %k1
281 ; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
282 ; X86-NEXT: vmovdqa %ymm1, %ymm0
285 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
287 ; X64-NEXT: kmovw %edi, %k1
288 ; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
289 ; X64-NEXT: vmovdqa %ymm1, %ymm0
291 %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
292 %2 = bitcast i8 %x2 to <8 x i1>
293 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
294 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x1
298 define <4 x i64> @test_int_x86_avx512_maskz_vpconflict_q_256(<4 x i64> %x0, i8 %x2) {
299 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
301 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
302 ; X86-NEXT: kmovw %eax, %k1
303 ; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
306 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
308 ; X64-NEXT: kmovw %edi, %k1
309 ; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
311 %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
312 %2 = bitcast i8 %x2 to <8 x i1>
313 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
314 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer
318 declare <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32>)
319 declare <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32>)
320 declare <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64>)
321 declare <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64>)