1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vpopcntdq,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK
5 define <16 x i32> @combine_mask_with_or(<16 x i32> %v0) {
6 ; CHECK-LABEL: combine_mask_with_or:
8 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
9 ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A
10 ; CHECK-NEXT: kmovw %eax, %k1
11 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
12 ; CHECK-NEXT: vpord %zmm2, %zmm1, %zmm1
13 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
14 ; CHECK-NEXT: kmovw %eax, %k1
15 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
16 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
18 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
19 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
20 %op0_0 = or <16 x i32> %shuf0_0, %shuf0_1
21 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
22 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
26 define <16 x i32> @combine_mask_with_mul(<16 x i32> %v0) {
27 ; CHECK-LABEL: combine_mask_with_mul:
29 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
30 ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A
31 ; CHECK-NEXT: kmovw %eax, %k1
32 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
33 ; CHECK-NEXT: vpmulld %zmm2, %zmm1, %zmm1
34 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
35 ; CHECK-NEXT: kmovw %eax, %k1
36 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
37 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
39 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
40 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
41 %op0_0 = mul <16 x i32> %shuf0_0, %shuf0_1
42 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
43 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
47 define <16 x i32> @combine_mask_with_abs(<16 x i32> %v0) {
48 ; CHECK-LABEL: combine_mask_with_abs:
50 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
51 ; CHECK-NEXT: vpabsd %zmm1, %zmm1
52 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
53 ; CHECK-NEXT: kmovw %eax, %k1
54 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
55 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
57 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
58 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
59 %op0_0_tmp0 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %shuf0_0, i1 true)
60 %op0_0_tmp1 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %shuf0_1, i1 true)
61 %op0_0 = shufflevector <16 x i32> %op0_0_tmp0, <16 x i32> %op0_0_tmp0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
62 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
63 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
67 define <16 x i32> @combine_mask_with_umin(<16 x i32> %v0) {
68 ; CHECK-LABEL: combine_mask_with_umin:
70 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
71 ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
72 ; CHECK-NEXT: kmovw %eax, %k1
73 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
74 ; CHECK-NEXT: vpminud %zmm2, %zmm1, %zmm1
75 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
76 ; CHECK-NEXT: kmovw %eax, %k1
77 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
78 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
80 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
81 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
82 %op0_0 = tail call <16 x i32> @llvm.umin.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1)
83 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
84 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
88 define <16 x i32> @combine_mask_with_umax(<16 x i32> %v0) {
89 ; CHECK-LABEL: combine_mask_with_umax:
91 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
92 ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
93 ; CHECK-NEXT: kmovw %eax, %k1
94 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
95 ; CHECK-NEXT: vpmaxud %zmm2, %zmm1, %zmm1
96 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
97 ; CHECK-NEXT: kmovw %eax, %k1
98 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
99 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
101 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
102 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
103 %op0_0 = tail call <16 x i32> @llvm.umax.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1)
104 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
105 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
109 define <16 x i32> @combine_mask_with_smin(<16 x i32> %v0) {
110 ; CHECK-LABEL: combine_mask_with_smin:
112 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
113 ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
114 ; CHECK-NEXT: kmovw %eax, %k1
115 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
116 ; CHECK-NEXT: vpminsd %zmm2, %zmm1, %zmm1
117 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
118 ; CHECK-NEXT: kmovw %eax, %k1
119 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
120 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
122 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
123 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
124 %op0_0 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1)
125 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
126 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
130 define <16 x i32> @combine_mask_with_smax(<16 x i32> %v0) {
131 ; CHECK-LABEL: combine_mask_with_smax:
133 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
134 ; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
135 ; CHECK-NEXT: kmovw %eax, %k1
136 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
137 ; CHECK-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
138 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
139 ; CHECK-NEXT: kmovw %eax, %k1
140 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
141 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
143 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
144 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
145 %op0_0 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1)
146 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
147 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
151 define <16 x i32> @combine_mask_with_shl(<16 x i32> %v0) {
152 ; CHECK-LABEL: combine_mask_with_shl:
154 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
155 ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A
156 ; CHECK-NEXT: kmovw %eax, %k1
157 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
158 ; CHECK-NEXT: vpsllvd %zmm2, %zmm1, %zmm1
159 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
160 ; CHECK-NEXT: kmovw %eax, %k1
161 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
162 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
164 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
165 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
166 %op0_0 = shl <16 x i32> %shuf0_0, %shuf0_1
167 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
168 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
172 define <16 x i32> @combine_mask_with_ashr(<16 x i32> %v0) {
173 ; CHECK-LABEL: combine_mask_with_ashr:
175 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
176 ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A
177 ; CHECK-NEXT: kmovw %eax, %k1
178 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
179 ; CHECK-NEXT: vpsravd %zmm2, %zmm1, %zmm1
180 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
181 ; CHECK-NEXT: kmovw %eax, %k1
182 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
183 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
185 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
186 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
187 %op0_0 = ashr <16 x i32> %shuf0_0, %shuf0_1
188 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
189 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
193 define <16 x i32> @combine_mask_with_lshr(<16 x i32> %v0) {
194 ; CHECK-LABEL: combine_mask_with_lshr:
196 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
197 ; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A
198 ; CHECK-NEXT: kmovw %eax, %k1
199 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
200 ; CHECK-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
201 ; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
202 ; CHECK-NEXT: kmovw %eax, %k1
203 ; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
204 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
206 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
207 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
208 %op0_0 = lshr <16 x i32> %shuf0_0, %shuf0_1
209 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0)
210 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
214 declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
215 declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
216 declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>)
217 declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>)
218 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
219 declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1)