1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
5 declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
7 define <4 x i32>@test_int_x86_avx512_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1) {
8 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_128:
10 ; CHECK-NEXT: vplzcntd %xmm0, %xmm0
11 ; CHECK-NEXT: ret{{[l|q]}}
12 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
16 define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
17 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
19 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
20 ; X86-NEXT: kmovw %eax, %k1
21 ; X86-NEXT: vplzcntd %xmm0, %xmm1 {%k1}
22 ; X86-NEXT: vmovdqa %xmm1, %xmm0
25 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
27 ; X64-NEXT: kmovw %edi, %k1
28 ; X64-NEXT: vplzcntd %xmm0, %xmm1 {%k1}
29 ; X64-NEXT: vmovdqa %xmm1, %xmm0
31 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
35 define <4 x i32>@test_int_x86_avx512_maskz_vplzcnt_d_128(<4 x i32> %x0, i8 %x2) {
36 ; X86-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
38 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
39 ; X86-NEXT: kmovw %eax, %k1
40 ; X86-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z}
43 ; X64-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128:
45 ; X64-NEXT: kmovw %edi, %k1
46 ; X64-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z}
48 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
52 declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
54 define <8 x i32>@test_int_x86_avx512_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1) {
55 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_256:
57 ; CHECK-NEXT: vplzcntd %ymm0, %ymm0
58 ; CHECK-NEXT: ret{{[l|q]}}
59 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
63 define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
64 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
66 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
67 ; X86-NEXT: kmovw %eax, %k1
68 ; X86-NEXT: vplzcntd %ymm0, %ymm1 {%k1}
69 ; X86-NEXT: vmovdqa %ymm1, %ymm0
72 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
74 ; X64-NEXT: kmovw %edi, %k1
75 ; X64-NEXT: vplzcntd %ymm0, %ymm1 {%k1}
76 ; X64-NEXT: vmovdqa %ymm1, %ymm0
78 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
82 declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
84 define <2 x i64>@test_int_x86_avx512_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1) {
85 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_128:
87 ; CHECK-NEXT: vplzcntq %xmm0, %xmm0
88 ; CHECK-NEXT: ret{{[l|q]}}
89 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
93 define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
94 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
96 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
97 ; X86-NEXT: kmovw %eax, %k1
98 ; X86-NEXT: vplzcntq %xmm0, %xmm1 {%k1}
99 ; X86-NEXT: vmovdqa %xmm1, %xmm0
102 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
104 ; X64-NEXT: kmovw %edi, %k1
105 ; X64-NEXT: vplzcntq %xmm0, %xmm1 {%k1}
106 ; X64-NEXT: vmovdqa %xmm1, %xmm0
108 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
112 declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
114 define <4 x i64>@test_int_x86_avx512_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1) {
115 ; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_256:
117 ; CHECK-NEXT: vplzcntq %ymm0, %ymm0
118 ; CHECK-NEXT: ret{{[l|q]}}
119 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
123 define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
124 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
126 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
127 ; X86-NEXT: kmovw %eax, %k1
128 ; X86-NEXT: vplzcntq %ymm0, %ymm1 {%k1}
129 ; X86-NEXT: vmovdqa %ymm1, %ymm0
132 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
134 ; X64-NEXT: kmovw %edi, %k1
135 ; X64-NEXT: vplzcntq %ymm0, %ymm1 {%k1}
136 ; X64-NEXT: vmovdqa %ymm1, %ymm0
138 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
142 define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
143 ; X86-LABEL: test_x86_vbroadcastmw_256:
145 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
146 ; X86-NEXT: vpbroadcastd %eax, %ymm0
149 ; X64-LABEL: test_x86_vbroadcastmw_256:
151 ; X64-NEXT: movzwl %di, %eax
152 ; X64-NEXT: vpbroadcastd %eax, %ymm0
154 %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
157 declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
159 define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
160 ; X86-LABEL: test_x86_vbroadcastmw_128:
162 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
163 ; X86-NEXT: vpbroadcastd %eax, %xmm0
166 ; X64-LABEL: test_x86_vbroadcastmw_128:
168 ; X64-NEXT: movzwl %di, %eax
169 ; X64-NEXT: vpbroadcastd %eax, %xmm0
171 %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
174 declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
176 define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
177 ; X86-LABEL: test_x86_broadcastmb_256:
179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
180 ; X86-NEXT: vmovd %eax, %xmm0
181 ; X86-NEXT: vpbroadcastq %xmm0, %ymm0
184 ; X64-LABEL: test_x86_broadcastmb_256:
186 ; X64-NEXT: movzbl %dil, %eax
187 ; X64-NEXT: vpbroadcastq %rax, %ymm0
189 %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
192 declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
194 define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
195 ; X86-LABEL: test_x86_broadcastmb_128:
197 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
198 ; X86-NEXT: vmovd %eax, %xmm0
199 ; X86-NEXT: vpbroadcastq %xmm0, %xmm0
202 ; X64-LABEL: test_x86_broadcastmb_128:
204 ; X64-NEXT: movzbl %dil, %eax
205 ; X64-NEXT: vpbroadcastq %rax, %xmm0
207 %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
210 declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
212 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
214 define <4 x i32> @test_int_x86_avx512_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1) {
215 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_128:
217 ; CHECK-NEXT: vpconflictd %xmm0, %xmm0
218 ; CHECK-NEXT: ret{{[l|q]}}
219 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
223 define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
224 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
226 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
227 ; X86-NEXT: kmovw %eax, %k1
228 ; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
229 ; X86-NEXT: vmovdqa %xmm1, %xmm0
232 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
234 ; X64-NEXT: kmovw %edi, %k1
235 ; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
236 ; X64-NEXT: vmovdqa %xmm1, %xmm0
238 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
242 define <4 x i32> @test_int_x86_avx512_maskz_vpconflict_d_128(<4 x i32> %x0, i8 %x2) {
243 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
245 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
246 ; X86-NEXT: kmovw %eax, %k1
247 ; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
250 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128:
252 ; X64-NEXT: kmovw %edi, %k1
253 ; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
255 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
259 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
261 define <8 x i32> @test_int_x86_avx512_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1) {
262 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_256:
264 ; CHECK-NEXT: vpconflictd %ymm0, %ymm0
265 ; CHECK-NEXT: ret{{[l|q]}}
266 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
270 define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
271 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
273 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
274 ; X86-NEXT: kmovw %eax, %k1
275 ; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
276 ; X86-NEXT: vmovdqa %ymm1, %ymm0
279 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
281 ; X64-NEXT: kmovw %edi, %k1
282 ; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
283 ; X64-NEXT: vmovdqa %ymm1, %ymm0
285 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
289 define <8 x i32> @test_int_x86_avx512_maskz_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
290 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
292 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
293 ; X86-NEXT: kmovw %eax, %k1
294 ; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
297 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256:
299 ; X64-NEXT: kmovw %edi, %k1
300 ; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
302 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> zeroinitializer, i8 %x2)
306 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
308 define <2 x i64> @test_int_x86_avx512_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1) {
309 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_128:
311 ; CHECK-NEXT: vpconflictq %xmm0, %xmm0
312 ; CHECK-NEXT: ret{{[l|q]}}
313 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
317 define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
318 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
320 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
321 ; X86-NEXT: kmovw %eax, %k1
322 ; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
323 ; X86-NEXT: vmovdqa %xmm1, %xmm0
326 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
328 ; X64-NEXT: kmovw %edi, %k1
329 ; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
330 ; X64-NEXT: vmovdqa %xmm1, %xmm0
332 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
336 define <2 x i64> @test_int_x86_avx512_maskz_vpconflict_q_128(<2 x i64> %x0, i8 %x2) {
337 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
339 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
340 ; X86-NEXT: kmovw %eax, %k1
341 ; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
344 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128:
346 ; X64-NEXT: kmovw %edi, %k1
347 ; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
349 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> zeroinitializer, i8 %x2)
353 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
355 define <4 x i64> @test_int_x86_avx512_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1) {
356 ; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_256:
358 ; CHECK-NEXT: vpconflictq %ymm0, %ymm0
359 ; CHECK-NEXT: ret{{[l|q]}}
360 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
364 define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
365 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
367 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
368 ; X86-NEXT: kmovw %eax, %k1
369 ; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
370 ; X86-NEXT: vmovdqa %ymm1, %ymm0
373 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
375 ; X64-NEXT: kmovw %edi, %k1
376 ; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
377 ; X64-NEXT: vmovdqa %ymm1, %ymm0
379 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
383 define <4 x i64> @test_int_x86_avx512_maskz_vpconflict_q_256(<4 x i64> %x0, i8 %x2) {
384 ; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
386 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
387 ; X86-NEXT: kmovw %eax, %k1
388 ; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
391 ; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256:
393 ; X64-NEXT: kmovw %edi, %k1
394 ; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
396 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> zeroinitializer, i8 %x2)