1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
5 declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
7 define <4 x i32>@test_int_x86_avx512_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1) {
8 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_128:
10 ; CHECK-NEXT: vplzcntd %xmm0, %xmm0
11 ; CHECK-NEXT: ret{{[l|q]}}
12 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
16 define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
17 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
19 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
20 ; X86-NEXT: kmovw %eax, %k1
21 ; X86-NEXT: vplzcntd %xmm0, %xmm1 {%k1}
22 ; X86-NEXT: vmovdqa %xmm1, %xmm0
25 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
27 ; X64-NEXT: kmovw %edi, %k1
28 ; X64-NEXT: vplzcntd %xmm0, %xmm1 {%k1}
29 ; X64-NEXT: vmovdqa %xmm1, %xmm0
31 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
35 define <4 x i32>@test_int_x86_avx512_maskz_vplzcnt_d_128(<4 x i32> %x0, i8 %x2) {
36 ; X86-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
38 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
39 ; X86-NEXT: kmovw %eax, %k1
40 ; X86-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z}
43 ; X64-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
45 ; X64-NEXT: kmovw %edi, %k1
46 ; X64-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z}
48 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
52 declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
54 define <8 x i32>@test_int_x86_avx512_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1) {
55 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_256:
57 ; CHECK-NEXT: vplzcntd %ymm0, %ymm0
58 ; CHECK-NEXT: ret{{[l|q]}}
59 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
63 define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
64 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
66 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
67 ; X86-NEXT: kmovw %eax, %k1
68 ; X86-NEXT: vplzcntd %ymm0, %ymm1 {%k1}
69 ; X86-NEXT: vmovdqa %ymm1, %ymm0
72 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
74 ; X64-NEXT: kmovw %edi, %k1
75 ; X64-NEXT: vplzcntd %ymm0, %ymm1 {%k1}
76 ; X64-NEXT: vmovdqa %ymm1, %ymm0
78 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
82 declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
84 define <2 x i64>@test_int_x86_avx512_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1) {
85 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_128:
87 ; CHECK-NEXT: vplzcntq %xmm0, %xmm0
88 ; CHECK-NEXT: ret{{[l|q]}}
89 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
93 define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
94 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
96 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
97 ; X86-NEXT: kmovw %eax, %k1
98 ; X86-NEXT: vplzcntq %xmm0, %xmm1 {%k1}
99 ; X86-NEXT: vmovdqa %xmm1, %xmm0
102 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
104 ; X64-NEXT: kmovw %edi, %k1
105 ; X64-NEXT: vplzcntq %xmm0, %xmm1 {%k1}
106 ; X64-NEXT: vmovdqa %xmm1, %xmm0
108 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
112 declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
114 define <4 x i64>@test_int_x86_avx512_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1) {
115 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_256:
117 ; CHECK-NEXT: vplzcntq %ymm0, %ymm0
118 ; CHECK-NEXT: ret{{[l|q]}}
119 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
123 define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
124 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
126 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
127 ; X86-NEXT: kmovw %eax, %k1
128 ; X86-NEXT: vplzcntq %ymm0, %ymm1 {%k1}
129 ; X86-NEXT: vmovdqa %ymm1, %ymm0
132 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
134 ; X64-NEXT: kmovw %edi, %k1
135 ; X64-NEXT: vplzcntq %ymm0, %ymm1 {%k1}
136 ; X64-NEXT: vmovdqa %ymm1, %ymm0
138 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
142 define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
143 ; X86-LABEL: test_x86_vbroadcastmw_256:
145 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
146 ; X86-NEXT: vpbroadcastd %eax, %ymm0
149 ; X64-LABEL: test_x86_vbroadcastmw_256:
151 ; X64-NEXT: movzwl %di, %eax
152 ; X64-NEXT: vpbroadcastd %eax, %ymm0
154 %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
157 declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
159 define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
160 ; X86-LABEL: test_x86_vbroadcastmw_128:
162 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
163 ; X86-NEXT: vpbroadcastd %eax, %xmm0
166 ; X64-LABEL: test_x86_vbroadcastmw_128:
168 ; X64-NEXT: movzwl %di, %eax
169 ; X64-NEXT: vpbroadcastd %eax, %xmm0
171 %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
174 declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
176 define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
177 ; X86-LABEL: test_x86_broadcastmb_256:
179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
180 ; X86-NEXT: vmovd %eax, %xmm0
181 ; X86-NEXT: vpbroadcastq %xmm0, %ymm0
184 ; X64-LABEL: test_x86_broadcastmb_256:
186 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
187 ; X64-NEXT: movzbl %dil, %eax
188 ; X64-NEXT: vpbroadcastq %rax, %ymm0
190 %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
193 declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
195 define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
196 ; X86-LABEL: test_x86_broadcastmb_128:
198 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
199 ; X86-NEXT: vmovd %eax, %xmm0
200 ; X86-NEXT: vpbroadcastq %xmm0, %xmm0
203 ; X64-LABEL: test_x86_broadcastmb_128:
205 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
206 ; X64-NEXT: movzbl %dil, %eax
207 ; X64-NEXT: vpbroadcastq %rax, %xmm0
209 %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
212 declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
214 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
216 define <4 x i32> @test_int_x86_avx512_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1) {
217 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_128:
219 ; CHECK-NEXT: vpconflictd %xmm0, %xmm0
220 ; CHECK-NEXT: ret{{[l|q]}}
221 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
225 define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
226 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
228 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
229 ; X86-NEXT: kmovw %eax, %k1
230 ; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
231 ; X86-NEXT: vmovdqa %xmm1, %xmm0
234 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
236 ; X64-NEXT: kmovw %edi, %k1
237 ; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
238 ; X64-NEXT: vmovdqa %xmm1, %xmm0
240 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
244 define <4 x i32> @test_int_x86_avx512_maskz_vpconflict_d_128(<4 x i32> %x0, i8 %x2) {
245 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
247 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
248 ; X86-NEXT: kmovw %eax, %k1
249 ; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
252 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
254 ; X64-NEXT: kmovw %edi, %k1
255 ; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
257 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
261 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
263 define <8 x i32> @test_int_x86_avx512_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1) {
264 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_256:
266 ; CHECK-NEXT: vpconflictd %ymm0, %ymm0
267 ; CHECK-NEXT: ret{{[l|q]}}
268 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
272 define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
273 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
275 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
276 ; X86-NEXT: kmovw %eax, %k1
277 ; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
278 ; X86-NEXT: vmovdqa %ymm1, %ymm0
281 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
283 ; X64-NEXT: kmovw %edi, %k1
284 ; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
285 ; X64-NEXT: vmovdqa %ymm1, %ymm0
287 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
291 define <8 x i32> @test_int_x86_avx512_maskz_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
292 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
295 ; X86-NEXT: kmovw %eax, %k1
296 ; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
299 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
301 ; X64-NEXT: kmovw %edi, %k1
302 ; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
304 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> zeroinitializer, i8 %x2)
308 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
310 define <2 x i64> @test_int_x86_avx512_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1) {
311 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_128:
313 ; CHECK-NEXT: vpconflictq %xmm0, %xmm0
314 ; CHECK-NEXT: ret{{[l|q]}}
315 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
319 define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
320 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
322 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
323 ; X86-NEXT: kmovw %eax, %k1
324 ; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
325 ; X86-NEXT: vmovdqa %xmm1, %xmm0
328 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
330 ; X64-NEXT: kmovw %edi, %k1
331 ; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
332 ; X64-NEXT: vmovdqa %xmm1, %xmm0
334 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
338 define <2 x i64> @test_int_x86_avx512_maskz_vpconflict_q_128(<2 x i64> %x0, i8 %x2) {
339 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
341 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
342 ; X86-NEXT: kmovw %eax, %k1
343 ; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
346 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
348 ; X64-NEXT: kmovw %edi, %k1
349 ; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
351 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> zeroinitializer, i8 %x2)
355 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
357 define <4 x i64> @test_int_x86_avx512_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1) {
358 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_256:
360 ; CHECK-NEXT: vpconflictq %ymm0, %ymm0
361 ; CHECK-NEXT: ret{{[l|q]}}
362 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
366 define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
367 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
369 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
370 ; X86-NEXT: kmovw %eax, %k1
371 ; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
372 ; X86-NEXT: vmovdqa %ymm1, %ymm0
375 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
377 ; X64-NEXT: kmovw %edi, %k1
378 ; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
379 ; X64-NEXT: vmovdqa %ymm1, %ymm0
381 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
385 define <4 x i64> @test_int_x86_avx512_maskz_vpconflict_q_256(<4 x i64> %x0, i8 %x2) {
386 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
388 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
389 ; X86-NEXT: kmovw %eax, %k1
390 ; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
393 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
395 ; X64-NEXT: kmovw %edi, %k1
396 ; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
398 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> zeroinitializer, i8 %x2)