1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
5 define dso_local <8 x i64> @select_sub(<8 x i64> %src, <8 x i64> %a, <8 x i64> %b, ptr %ptr) {
6 ; AVX512-LABEL: select_sub:
7 ; AVX512: # %bb.0: # %entry
8 ; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
9 ; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
10 ; AVX512-NEXT: vpsubq %zmm2, %zmm1, %zmm0 {%k1}
13 %arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
14 %0 = load <8 x i64>, ptr %arrayidx, align 64
15 %and1 = and <8 x i64> %0, <i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248>
16 %not = icmp ne <8 x i64> %and1, zeroinitializer
17 %sub = sub <8 x i64> %a, %b
18 %1 = select <8 x i1> %not, <8 x i64> %src, <8 x i64> %sub
22 define dso_local <8 x i64> @select_add(<8 x i64> %src, <8 x i64> %a, <8 x i64> %b, ptr %ptr) {
23 ; AVX512-LABEL: select_add:
24 ; AVX512: # %bb.0: # %entry
25 ; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
26 ; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
27 ; AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm0 {%k1}
30 %arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
31 %0 = load <8 x i64>, ptr %arrayidx, align 64
32 %and1 = and <8 x i64> %0, <i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248>
33 %not = icmp ne <8 x i64> %and1, zeroinitializer
34 %add = add <8 x i64> %a, %b
35 %1 = select <8 x i1> %not, <8 x i64> %src, <8 x i64> %add
39 define dso_local <8 x i64> @select_and(<8 x i64> %src, <8 x i64> %a, <8 x i64> %b, ptr %ptr) {
40 ; AVX512-LABEL: select_and:
41 ; AVX512: # %bb.0: # %entry
42 ; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
43 ; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
44 ; AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm0 {%k1}
47 %arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
48 %0 = load <8 x i64>, ptr %arrayidx, align 64
49 %and1 = and <8 x i64> %0, <i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248>
50 %not = icmp ne <8 x i64> %and1, zeroinitializer
51 %and = and <8 x i64> %a, %b
52 %1 = select <8 x i1> %not, <8 x i64> %src, <8 x i64> %and
56 define dso_local <8 x i64> @select_xor(<8 x i64> %src, <8 x i64> %a, <8 x i64> %b, ptr %ptr) {
57 ; AVX512-LABEL: select_xor:
58 ; AVX512: # %bb.0: # %entry
59 ; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
60 ; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
61 ; AVX512-NEXT: vpxorq %zmm2, %zmm1, %zmm0 {%k1}
64 %arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
65 %0 = load <8 x i64>, ptr %arrayidx, align 64
66 %and1 = and <8 x i64> %0, <i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248>
67 %not = icmp ne <8 x i64> %and1, zeroinitializer
68 %xor = xor <8 x i64> %a, %b
69 %1 = select <8 x i1> %not, <8 x i64> %src, <8 x i64> %xor
73 define dso_local <8 x i64> @select_shl(<8 x i64> %src, <8 x i64> %a, <8 x i64> %b, ptr %ptr) {
74 ; AVX512-LABEL: select_shl:
75 ; AVX512: # %bb.0: # %entry
76 ; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
77 ; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
78 ; AVX512-NEXT: vpsllvq %zmm2, %zmm1, %zmm0 {%k1}
81 %arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
82 %0 = load <8 x i64>, ptr %arrayidx, align 64
83 %and1 = and <8 x i64> %0, <i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248>
84 %not = icmp ne <8 x i64> %and1, zeroinitializer
85 %shl = shl <8 x i64> %a, %b
86 %1 = select <8 x i1> %not, <8 x i64> %src, <8 x i64> %shl
90 define dso_local <8 x i64> @select_srl(<8 x i64> %src, <8 x i64> %a, <8 x i64> %b, ptr %ptr) {
91 ; AVX512-LABEL: select_srl:
92 ; AVX512: # %bb.0: # %entry
93 ; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
94 ; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
95 ; AVX512-NEXT: vpsrlvq %zmm2, %zmm1, %zmm0 {%k1}
98 %arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
99 %0 = load <8 x i64>, ptr %arrayidx, align 64
100 %and1 = and <8 x i64> %0, <i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248>
101 %not = icmp ne <8 x i64> %and1, zeroinitializer
102 %srl = lshr <8 x i64> %a, %b
103 %1 = select <8 x i1> %not, <8 x i64> %src, <8 x i64> %srl
107 define dso_local <8 x i64> @select_sra(<8 x i64> %src, <8 x i64> %a, <8 x i64> %b, ptr %ptr) {
108 ; AVX512-LABEL: select_sra:
109 ; AVX512: # %bb.0: # %entry
110 ; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
111 ; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
112 ; AVX512-NEXT: vpsravq %zmm2, %zmm1, %zmm0 {%k1}
115 %arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
116 %0 = load <8 x i64>, ptr %arrayidx, align 64
117 %and1 = and <8 x i64> %0, <i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248, i64 2251799813685248>
118 %not = icmp ne <8 x i64> %and1, zeroinitializer
119 %sra = ashr <8 x i64> %a, %b
120 %1 = select <8 x i1> %not, <8 x i64> %src, <8 x i64> %sra
124 define dso_local <8 x i32> @select_mul(<8 x i32> %src, <8 x i32> %a, <8 x i32> %b, ptr %ptr) {
125 ; AVX512F-LABEL: select_mul:
126 ; AVX512F: # %bb.0: # %entry
127 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
128 ; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
129 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
130 ; AVX512F-NEXT: vpmulld %ymm2, %ymm1, %ymm1
131 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
132 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
135 ; AVX512VL-LABEL: select_mul:
136 ; AVX512VL: # %bb.0: # %entry
137 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
138 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
139 ; AVX512VL-NEXT: vpmulld %ymm2, %ymm1, %ymm0 {%k1}
140 ; AVX512VL-NEXT: retq
142 %arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
143 %0 = load <8 x i32>, ptr %arrayidx, align 64
144 %and1 = and <8 x i32> %0, <i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517>
145 %not = icmp ne <8 x i32> %and1, zeroinitializer
146 %mul = mul <8 x i32> %a, %b
147 %1 = select <8 x i1> %not, <8 x i32> %src, <8 x i32> %mul
151 define dso_local <8 x i32> @select_smax(<8 x i32> %src, <8 x i32> %a, <8 x i32> %b, ptr %ptr) {
152 ; AVX512F-LABEL: select_smax:
153 ; AVX512F: # %bb.0: # %entry
154 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
155 ; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
156 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
157 ; AVX512F-NEXT: vpmaxsd %ymm2, %ymm1, %ymm1
158 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
159 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
162 ; AVX512VL-LABEL: select_smax:
163 ; AVX512VL: # %bb.0: # %entry
164 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
165 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
166 ; AVX512VL-NEXT: vpmaxsd %ymm2, %ymm1, %ymm0 {%k1}
167 ; AVX512VL-NEXT: retq
169 %arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
170 %0 = load <8 x i32>, ptr %arrayidx, align 64
171 %and1 = and <8 x i32> %0, <i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517>
172 %not = icmp ne <8 x i32> %and1, zeroinitializer
173 %smax = call <8 x i32> @llvm.smax.v4i32(<8 x i32> %a, <8 x i32> %b)
174 %1 = select <8 x i1> %not, <8 x i32> %src, <8 x i32> %smax
177 declare <8 x i32> @llvm.smax.v4i32(<8 x i32> %a, <8 x i32> %b)
179 define dso_local <8 x i32> @select_smin(<8 x i32> %src, <8 x i32> %a, <8 x i32> %b, ptr %ptr) {
180 ; AVX512F-LABEL: select_smin:
181 ; AVX512F: # %bb.0: # %entry
182 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
183 ; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
184 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
185 ; AVX512F-NEXT: vpminsd %ymm2, %ymm1, %ymm1
186 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
187 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
190 ; AVX512VL-LABEL: select_smin:
191 ; AVX512VL: # %bb.0: # %entry
192 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
193 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
194 ; AVX512VL-NEXT: vpminsd %ymm2, %ymm1, %ymm0 {%k1}
195 ; AVX512VL-NEXT: retq
197 %arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
198 %0 = load <8 x i32>, ptr %arrayidx, align 64
199 %and1 = and <8 x i32> %0, <i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517>
200 %not = icmp ne <8 x i32> %and1, zeroinitializer
201 %smin = call <8 x i32> @llvm.smin.v4i32(<8 x i32> %a, <8 x i32> %b)
202 %1 = select <8 x i1> %not, <8 x i32> %src, <8 x i32> %smin
205 declare <8 x i32> @llvm.smin.v4i32(<8 x i32> %a, <8 x i32> %b)
207 define dso_local <8 x i32> @select_umax(<8 x i32> %src, <8 x i32> %a, <8 x i32> %b, ptr %ptr) {
208 ; AVX512F-LABEL: select_umax:
209 ; AVX512F: # %bb.0: # %entry
210 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
211 ; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
212 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
213 ; AVX512F-NEXT: vpmaxud %ymm2, %ymm1, %ymm1
214 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
215 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
218 ; AVX512VL-LABEL: select_umax:
219 ; AVX512VL: # %bb.0: # %entry
220 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
221 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
222 ; AVX512VL-NEXT: vpmaxud %ymm2, %ymm1, %ymm0 {%k1}
223 ; AVX512VL-NEXT: retq
225 %arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
226 %0 = load <8 x i32>, ptr %arrayidx, align 64
227 %and1 = and <8 x i32> %0, <i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517>
228 %not = icmp ne <8 x i32> %and1, zeroinitializer
229 %umax = call <8 x i32> @llvm.umax.v4i32(<8 x i32> %a, <8 x i32> %b)
230 %1 = select <8 x i1> %not, <8 x i32> %src, <8 x i32> %umax
233 declare <8 x i32> @llvm.umax.v4i32(<8 x i32> %a, <8 x i32> %b)
235 define dso_local <8 x i32> @select_umin(<8 x i32> %src, <8 x i32> %a, <8 x i32> %b, ptr %ptr) {
236 ; AVX512F-LABEL: select_umin:
237 ; AVX512F: # %bb.0: # %entry
238 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
239 ; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
240 ; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
241 ; AVX512F-NEXT: vpminud %ymm2, %ymm1, %ymm1
242 ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
243 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
246 ; AVX512VL-LABEL: select_umin:
247 ; AVX512VL: # %bb.0: # %entry
248 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
249 ; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
250 ; AVX512VL-NEXT: vpminud %ymm2, %ymm1, %ymm0 {%k1}
251 ; AVX512VL-NEXT: retq
253 %arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
254 %0 = load <8 x i32>, ptr %arrayidx, align 64
255 %and1 = and <8 x i32> %0, <i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517, i32 22517>
256 %not = icmp ne <8 x i32> %and1, zeroinitializer
257 %umin = call <8 x i32> @llvm.umin.v4i32(<8 x i32> %a, <8 x i32> %b)
258 %1 = select <8 x i1> %not, <8 x i32> %src, <8 x i32> %umin
261 declare <8 x i32> @llvm.umin.v4i32(<8 x i32> %a, <8 x i32> %b)