1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O3 -x86-asm-syntax=intel -mtriple=x86_64 -mcpu=skylake-avx512 < %s | FileCheck %s
4 declare <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64, i64)
5 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64)
6 declare <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64, i64)
7 declare <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64, i64)
8 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
9 declare <64 x i1> @llvm.get.active.lane.mask.v64i1.i32(i32, i32)
11 define <7 x i1> @create_mask7(i64 %0) {
12 ; CHECK-LABEL: create_mask7:
14 ; CHECK-NEXT: mov rax, rdi
15 ; CHECK-NEXT: vpbroadcastq zmm0, rsi
16 ; CHECK-NEXT: vpcmpnleuq k0, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
17 ; CHECK-NEXT: kshiftrb k1, k0, 6
18 ; CHECK-NEXT: kmovd ecx, k1
19 ; CHECK-NEXT: kshiftrb k1, k0, 5
20 ; CHECK-NEXT: kmovd edx, k1
21 ; CHECK-NEXT: kshiftrb k1, k0, 4
22 ; CHECK-NEXT: kmovd esi, k1
23 ; CHECK-NEXT: kshiftrb k1, k0, 3
24 ; CHECK-NEXT: kmovd edi, k1
25 ; CHECK-NEXT: kshiftrb k1, k0, 2
26 ; CHECK-NEXT: kmovd r8d, k1
27 ; CHECK-NEXT: kshiftrb k1, k0, 1
28 ; CHECK-NEXT: kmovd r9d, k1
29 ; CHECK-NEXT: kmovd r10d, k0
30 ; CHECK-NEXT: and r10b, 1
31 ; CHECK-NEXT: and r9b, 1
32 ; CHECK-NEXT: add r9b, r9b
33 ; CHECK-NEXT: or r9b, r10b
34 ; CHECK-NEXT: and r8b, 1
35 ; CHECK-NEXT: shl r8b, 2
36 ; CHECK-NEXT: or r8b, r9b
37 ; CHECK-NEXT: and dil, 1
38 ; CHECK-NEXT: shl dil, 3
39 ; CHECK-NEXT: or dil, r8b
40 ; CHECK-NEXT: and sil, 1
41 ; CHECK-NEXT: shl sil, 4
42 ; CHECK-NEXT: or sil, dil
43 ; CHECK-NEXT: and dl, 1
44 ; CHECK-NEXT: shl dl, 5
45 ; CHECK-NEXT: or dl, sil
46 ; CHECK-NEXT: shl cl, 6
47 ; CHECK-NEXT: or cl, dl
48 ; CHECK-NEXT: and cl, 127
49 ; CHECK-NEXT: mov byte ptr [rax], cl
50 ; CHECK-NEXT: vzeroupper
52 %2 = call <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64 0, i64 %0)
56 define <16 x i1> @create_mask16(i64 %0) {
57 ; CHECK-LABEL: create_mask16:
59 ; CHECK-NEXT: vpbroadcastq zmm0, rdi
60 ; CHECK-NEXT: vpcmpnleuq k0, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
61 ; CHECK-NEXT: vpcmpnleuq k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
62 ; CHECK-NEXT: kunpckbw k0, k1, k0
63 ; CHECK-NEXT: vpmovm2b xmm0, k0
64 ; CHECK-NEXT: vzeroupper
66 %2 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 0, i64 %0)
70 define <32 x i1> @create_mask32(i64 %0) {
71 ; CHECK-LABEL: create_mask32:
73 ; CHECK-NEXT: vpbroadcastq zmm0, rdi
74 ; CHECK-NEXT: vpcmpnleuq k0, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
75 ; CHECK-NEXT: vpcmpnleuq k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
76 ; CHECK-NEXT: vpcmpnleuq k2, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
77 ; CHECK-NEXT: kunpckbw k0, k1, k0
78 ; CHECK-NEXT: vpcmpnleuq k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
79 ; CHECK-NEXT: kunpckbw k1, k1, k2
80 ; CHECK-NEXT: kunpckwd k0, k1, k0
81 ; CHECK-NEXT: vpmovm2b ymm0, k0
83 %2 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 0, i64 %0)
87 define <64 x i1> @create_mask64(i64 %0) {
88 ; CHECK-LABEL: create_mask64:
90 ; CHECK-NEXT: vpbroadcastq zmm0, rdi
91 ; CHECK-NEXT: vpcmpnleuq k0, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
92 ; CHECK-NEXT: vpcmpnleuq k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
93 ; CHECK-NEXT: vpcmpnleuq k2, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
94 ; CHECK-NEXT: kunpckbw k0, k1, k0
95 ; CHECK-NEXT: vpcmpnleuq k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
96 ; CHECK-NEXT: kunpckbw k1, k1, k2
97 ; CHECK-NEXT: vpcmpnleuq k2, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
98 ; CHECK-NEXT: kunpckwd k0, k1, k0
99 ; CHECK-NEXT: vpcmpnleuq k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
100 ; CHECK-NEXT: kunpckbw k1, k1, k2
101 ; CHECK-NEXT: vpcmpnleuq k2, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
102 ; CHECK-NEXT: vpcmpnleuq k3, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
103 ; CHECK-NEXT: kunpckbw k2, k3, k2
104 ; CHECK-NEXT: kunpckwd k1, k2, k1
105 ; CHECK-NEXT: kunpckdq k0, k1, k0
106 ; CHECK-NEXT: vpmovm2b zmm0, k0
108 %2 = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 0, i64 %0)
112 define <16 x i1> @create_mask16_i32(i32 %0) {
113 ; CHECK-LABEL: create_mask16_i32:
115 ; CHECK-NEXT: vpbroadcastd zmm0, edi
116 ; CHECK-NEXT: vpcmpnleud k0, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
117 ; CHECK-NEXT: vpmovm2b xmm0, k0
118 ; CHECK-NEXT: vzeroupper
120 %2 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 %0)
124 define <64 x i1> @create_mask64_i32(i32 %0) {
125 ; CHECK-LABEL: create_mask64_i32:
127 ; CHECK-NEXT: vpbroadcastd zmm0, edi
128 ; CHECK-NEXT: vpcmpnleud k0, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
129 ; CHECK-NEXT: vpcmpnleud k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
130 ; CHECK-NEXT: vpcmpnleud k2, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
131 ; CHECK-NEXT: kunpckwd k0, k1, k0
132 ; CHECK-NEXT: vpcmpnleud k1, zmm0, zmmword ptr [rip + {{\.?LCPI[0-9]+_[0-9]+}}]
133 ; CHECK-NEXT: kunpckwd k1, k1, k2
134 ; CHECK-NEXT: kunpckdq k0, k1, k0
135 ; CHECK-NEXT: vpmovm2b zmm0, k0
137 %2 = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i32(i32 0, i32 %0)