1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX
7 define i1 @logical_and_icmp(<4 x i32> %x) {
8 ; CHECK-LABEL: @logical_and_icmp(
9 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], zeroinitializer
10 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
11 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
12 ; CHECK-NEXT: ret i1 [[TMP3]]
14 %x0 = extractelement <4 x i32> %x, i32 0
15 %x1 = extractelement <4 x i32> %x, i32 1
16 %x2 = extractelement <4 x i32> %x, i32 2
17 %x3 = extractelement <4 x i32> %x, i32 3
18 %c0 = icmp slt i32 %x0, 0
19 %c1 = icmp slt i32 %x1, 0
20 %c2 = icmp slt i32 %x2, 0
21 %c3 = icmp slt i32 %x3, 0
22 %s1 = select i1 %c0, i1 %c1, i1 false
23 %s2 = select i1 %s1, i1 %c2, i1 false
24 %s3 = select i1 %s2, i1 %c3, i1 false
28 define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) {
29 ; CHECK-LABEL: @logical_or_icmp(
30 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
31 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
32 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
33 ; CHECK-NEXT: ret i1 [[TMP3]]
35 %x0 = extractelement <4 x i32> %x, i32 0
36 %x1 = extractelement <4 x i32> %x, i32 1
37 %x2 = extractelement <4 x i32> %x, i32 2
38 %x3 = extractelement <4 x i32> %x, i32 3
39 %y0 = extractelement <4 x i32> %y, i32 0
40 %y1 = extractelement <4 x i32> %y, i32 1
41 %y2 = extractelement <4 x i32> %y, i32 2
42 %y3 = extractelement <4 x i32> %y, i32 3
43 %c0 = icmp slt i32 %x0, %y0
44 %c1 = icmp slt i32 %x1, %y1
45 %c2 = icmp slt i32 %x2, %y2
46 %c3 = icmp slt i32 %x3, %y3
47 %s1 = select i1 %c0, i1 true, i1 %c1
48 %s2 = select i1 %s1, i1 true, i1 %c2
49 %s3 = select i1 %s2, i1 true, i1 %c3
53 define i1 @logical_and_fcmp(<4 x float> %x) {
54 ; CHECK-LABEL: @logical_and_fcmp(
55 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
56 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
57 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
58 ; CHECK-NEXT: ret i1 [[TMP3]]
60 %x0 = extractelement <4 x float> %x, i32 0
61 %x1 = extractelement <4 x float> %x, i32 1
62 %x2 = extractelement <4 x float> %x, i32 2
63 %x3 = extractelement <4 x float> %x, i32 3
64 %c0 = fcmp olt float %x0, 0.0
65 %c1 = fcmp olt float %x1, 0.0
66 %c2 = fcmp olt float %x2, 0.0
67 %c3 = fcmp olt float %x3, 0.0
68 %s1 = select i1 %c0, i1 %c1, i1 false
69 %s2 = select i1 %s1, i1 %c2, i1 false
70 %s3 = select i1 %s2, i1 %c3, i1 false
74 define i1 @logical_or_fcmp(<4 x float> %x) {
75 ; CHECK-LABEL: @logical_or_fcmp(
76 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
77 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
78 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
79 ; CHECK-NEXT: ret i1 [[TMP3]]
81 %x0 = extractelement <4 x float> %x, i32 0
82 %x1 = extractelement <4 x float> %x, i32 1
83 %x2 = extractelement <4 x float> %x, i32 2
84 %x3 = extractelement <4 x float> %x, i32 3
85 %c0 = fcmp olt float %x0, 0.0
86 %c1 = fcmp olt float %x1, 0.0
87 %c2 = fcmp olt float %x2, 0.0
88 %c3 = fcmp olt float %x3, 0.0
89 %s1 = select i1 %c0, i1 true, i1 %c1
90 %s2 = select i1 %s1, i1 true, i1 %c2
91 %s3 = select i1 %s2, i1 true, i1 %c3
95 define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
96 ; SSE-LABEL: @logical_and_icmp_diff_preds(
97 ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
98 ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
99 ; SSE-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
100 ; SSE-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
101 ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
102 ; SSE-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
103 ; SSE-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
104 ; SSE-NEXT: ret i1 [[TMP7]]
106 ; AVX-LABEL: @logical_and_icmp_diff_preds(
107 ; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
108 ; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
109 ; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
110 ; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
111 ; AVX-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
112 ; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0
113 ; AVX-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0
114 ; AVX-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0
115 ; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
116 ; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
117 ; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
118 ; AVX-NEXT: ret i1 [[S3]]
120 %x0 = extractelement <4 x i32> %x, i32 0
121 %x1 = extractelement <4 x i32> %x, i32 1
122 %x2 = extractelement <4 x i32> %x, i32 2
123 %x3 = extractelement <4 x i32> %x, i32 3
124 %c0 = icmp ult i32 %x0, 0
125 %c1 = icmp slt i32 %x1, 0
126 %c2 = icmp sgt i32 %x2, 0
127 %c3 = icmp slt i32 %x3, 0
128 %s1 = select i1 %c0, i1 %c1, i1 false
129 %s2 = select i1 %s1, i1 %c2, i1 false
130 %s3 = select i1 %s2, i1 %c3, i1 false
134 define i1 @logical_and_icmp_diff_const(<4 x i32> %x) {
135 ; CHECK-LABEL: @logical_and_icmp_diff_const(
136 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 0, i32 1, i32 2, i32 3>
137 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
138 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
139 ; CHECK-NEXT: ret i1 [[TMP3]]
141 %x0 = extractelement <4 x i32> %x, i32 0
142 %x1 = extractelement <4 x i32> %x, i32 1
143 %x2 = extractelement <4 x i32> %x, i32 2
144 %x3 = extractelement <4 x i32> %x, i32 3
145 %c0 = icmp sgt i32 %x0, 0
146 %c1 = icmp sgt i32 %x1, 1
147 %c2 = icmp sgt i32 %x2, 2
148 %c3 = icmp sgt i32 %x3, 3
149 %s1 = select i1 %c0, i1 %c1, i1 false
150 %s2 = select i1 %s1, i1 %c2, i1 false
151 %s3 = select i1 %s2, i1 %c3, i1 false
155 define i1 @mixed_logical_icmp(<4 x i32> %x) {
156 ; CHECK-LABEL: @mixed_logical_icmp(
157 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
158 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
159 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
160 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false
161 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
162 ; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[TMP4]]
163 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
164 ; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false
165 ; CHECK-NEXT: ret i1 [[S3]]
167 %x0 = extractelement <4 x i32> %x, i32 0
168 %x1 = extractelement <4 x i32> %x, i32 1
169 %x2 = extractelement <4 x i32> %x, i32 2
170 %x3 = extractelement <4 x i32> %x, i32 3
171 %c0 = icmp sgt i32 %x0, 0
172 %c1 = icmp sgt i32 %x1, 0
173 %c2 = icmp sgt i32 %x2, 0
174 %c3 = icmp sgt i32 %x3, 0
175 %s1 = select i1 %c0, i1 %c1, i1 false
176 %s2 = select i1 %s1, i1 true, i1 %c2
177 %s3 = select i1 %s2, i1 %c3, i1 false
181 define i1 @logical_and_icmp_subvec(<4 x i32> %x) {
182 ; CHECK-LABEL: @logical_and_icmp_subvec(
183 ; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
184 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
185 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
186 ; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0
187 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
188 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
189 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
190 ; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
191 ; CHECK-NEXT: ret i1 [[S2]]
193 %x0 = extractelement <4 x i32> %x, i32 0
194 %x1 = extractelement <4 x i32> %x, i32 1
195 %x2 = extractelement <4 x i32> %x, i32 2
196 %c0 = icmp slt i32 %x0, 0
197 %c1 = icmp slt i32 %x1, 0
198 %c2 = icmp slt i32 %x2, 0
199 %s1 = select i1 %c0, i1 %c1, i1 false
200 %s2 = select i1 %s1, i1 %c2, i1 false
204 ; TODO: This is better than all-scalar and still safe,
205 ; but we want this to be 2 reductions with glue
206 ; logic...or a wide reduction?
208 define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
209 ; CHECK-LABEL: @logical_and_icmp_clamp(
210 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
211 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
212 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
213 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
214 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
215 ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
216 ; CHECK-NEXT: ret i1 [[TMP6]]
218 %x0 = extractelement <4 x i32> %x, i32 0
219 %x1 = extractelement <4 x i32> %x, i32 1
220 %x2 = extractelement <4 x i32> %x, i32 2
221 %x3 = extractelement <4 x i32> %x, i32 3
222 %c0 = icmp slt i32 %x0, 42
223 %c1 = icmp slt i32 %x1, 42
224 %c2 = icmp slt i32 %x2, 42
225 %c3 = icmp slt i32 %x3, 42
226 %d0 = icmp sgt i32 %x0, 17
227 %d1 = icmp sgt i32 %x1, 17
228 %d2 = icmp sgt i32 %x2, 17
229 %d3 = icmp sgt i32 %x3, 17
230 %s1 = select i1 %c0, i1 %c1, i1 false
231 %s2 = select i1 %s1, i1 %c2, i1 false
232 %s3 = select i1 %s2, i1 %c3, i1 false
233 %s4 = select i1 %s3, i1 %d0, i1 false
234 %s5 = select i1 %s4, i1 %d1, i1 false
235 %s6 = select i1 %s5, i1 %d2, i1 false
236 %s7 = select i1 %s6, i1 %d3, i1 false
240 define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
241 ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
242 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
243 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
244 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
245 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
246 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
247 ; CHECK-NEXT: call void @use1(i1 [[TMP5]])
248 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
249 ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
250 ; CHECK-NEXT: ret i1 [[TMP7]]
252 %x0 = extractelement <4 x i32> %x, i32 0
253 %x1 = extractelement <4 x i32> %x, i32 1
254 %x2 = extractelement <4 x i32> %x, i32 2
255 %x3 = extractelement <4 x i32> %x, i32 3
256 %c0 = icmp slt i32 %x0, 42
257 %c1 = icmp slt i32 %x1, 42
258 %c2 = icmp slt i32 %x2, 42
259 call void @use1(i1 %c2)
260 %c3 = icmp slt i32 %x3, 42
261 %d0 = icmp sgt i32 %x0, 17
262 %d1 = icmp sgt i32 %x1, 17
263 %d2 = icmp sgt i32 %x2, 17
264 %d3 = icmp sgt i32 %x3, 17
265 %s1 = select i1 %c0, i1 %c1, i1 false
266 %s2 = select i1 %s1, i1 %c2, i1 false
267 %s3 = select i1 %s2, i1 %c3, i1 false
268 %s4 = select i1 %s3, i1 %d0, i1 false
269 %s5 = select i1 %s4, i1 %d1, i1 false
270 %s6 = select i1 %s5, i1 %d2, i1 false
271 %s7 = select i1 %s6, i1 %d3, i1 false
275 define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
276 ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select(
277 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
278 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
279 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
280 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
281 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
282 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
283 ; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP5]], i1 false
284 ; CHECK-NEXT: call void @use1(i1 [[S2]])
285 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]]
286 ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
287 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
288 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP8]], i1 false
289 ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[S2]], i1 [[OP_RDX]], i1 false
290 ; CHECK-NEXT: ret i1 [[OP_RDX1]]
292 %x0 = extractelement <4 x i32> %x, i32 0
293 %x1 = extractelement <4 x i32> %x, i32 1
294 %x2 = extractelement <4 x i32> %x, i32 2
295 %x3 = extractelement <4 x i32> %x, i32 3
296 %c0 = icmp slt i32 %x0, 42
297 %c1 = icmp slt i32 %x1, 42
298 %c2 = icmp slt i32 %x2, 42
299 %c3 = icmp slt i32 %x3, 42
300 %d0 = icmp sgt i32 %x0, 17
301 %d1 = icmp sgt i32 %x1, 17
302 %d2 = icmp sgt i32 %x2, 17
303 %d3 = icmp sgt i32 %x3, 17
304 %s1 = select i1 %c0, i1 %c1, i1 false
305 %s2 = select i1 %s1, i1 %c2, i1 false
306 call void @use1(i1 %s2)
307 %s3 = select i1 %s2, i1 %c3, i1 false
308 %s4 = select i1 %s3, i1 %d0, i1 false
309 %s5 = select i1 %s4, i1 %d1, i1 false
310 %s6 = select i1 %s5, i1 %d2, i1 false
311 %s7 = select i1 %s6, i1 %d3, i1 false
315 define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
316 ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32(
317 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
318 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
319 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP2]]
320 ; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
321 ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]])
322 ; CHECK-NEXT: ret i1 [[TMP5]]
324 %x0 = extractelement <8 x i32> %x, i32 0
325 %x1 = extractelement <8 x i32> %x, i32 1
326 %x2 = extractelement <8 x i32> %x, i32 2
327 %x3 = extractelement <8 x i32> %x, i32 3
328 %y0 = extractelement <8 x i32> %y, i32 0
329 %y1 = extractelement <8 x i32> %y, i32 1
330 %y2 = extractelement <8 x i32> %y, i32 2
331 %y3 = extractelement <8 x i32> %y, i32 3
332 %c0 = icmp slt i32 %x0, 42
333 %c1 = icmp slt i32 %x1, 42
334 %c2 = icmp slt i32 %x2, 42
335 %c3 = icmp slt i32 %x3, 42
336 %d0 = icmp slt i32 %x0, %y0
337 %d1 = icmp slt i32 %x1, %y1
338 %d2 = icmp slt i32 %x2, %y2
339 %d3 = icmp slt i32 %x3, %y3
340 %s1 = select i1 %c0, i1 %c1, i1 false
341 %s2 = select i1 %s1, i1 %c2, i1 false
342 %s3 = select i1 %s2, i1 %c3, i1 false
343 %s4 = select i1 %s3, i1 %d0, i1 false
344 %s5 = select i1 %s4, i1 %d1, i1 false
345 %s6 = select i1 %s5, i1 %d2, i1 false
346 %s7 = select i1 %s6, i1 %d3, i1 false
350 define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
351 ; CHECK-LABEL: @logical_and_icmp_clamp_partial(
352 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
353 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
354 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], <i32 42, i32 42>
355 ; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
356 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
357 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
358 ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
359 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
360 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false
361 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
362 ; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]]
363 ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false
364 ; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[OP_RDX]]
365 ; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP10]], i1 [[OP_RDX1]], i1 false
366 ; CHECK-NEXT: ret i1 [[OP_RDX2]]
368 %x0 = extractelement <4 x i32> %x, i32 0
369 %x1 = extractelement <4 x i32> %x, i32 1
370 %x2 = extractelement <4 x i32> %x, i32 2
371 %x3 = extractelement <4 x i32> %x, i32 3
372 %c0 = icmp slt i32 %x0, 42
373 %c1 = icmp slt i32 %x1, 42
374 %c2 = icmp slt i32 %x2, 42
375 ; remove an element from the previous test
376 %d0 = icmp sgt i32 %x0, 17
377 %d1 = icmp sgt i32 %x1, 17
378 %d2 = icmp sgt i32 %x2, 17
379 %d3 = icmp sgt i32 %x3, 17
380 %s1 = select i1 %c0, i1 %c1, i1 false
381 %s2 = select i1 %s1, i1 %c2, i1 false
382 ; remove an element from the previous test
383 %s4 = select i1 %s2, i1 %d0, i1 false
384 %s5 = select i1 %s4, i1 %d1, i1 false
385 %s6 = select i1 %s5, i1 %d2, i1 false
386 %s7 = select i1 %s6, i1 %d3, i1 false
390 define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
391 ; SSE-LABEL: @logical_and_icmp_clamp_pred_diff(
392 ; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
393 ; SSE-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42>
394 ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
395 ; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
396 ; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
397 ; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
398 ; SSE-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP3]]
399 ; SSE-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP7]])
400 ; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP8]], i1 false
401 ; SSE-NEXT: ret i1 [[OP_RDX]]
403 ; AVX-LABEL: @logical_and_icmp_clamp_pred_diff(
404 ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
405 ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 42, i32 42, i32 42, i32 poison>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 3>
406 ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 17, i32 17, i32 17, i32 17, i32 poison, i32 poison, i32 poison, i32 42>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 15>
407 ; AVX-NEXT: [[TMP4:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[TMP3]]
408 ; AVX-NEXT: [[TMP5:%.*]] = icmp ult <8 x i32> [[TMP2]], [[TMP3]]
409 ; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
410 ; AVX-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP6]]
411 ; AVX-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP7]])
412 ; AVX-NEXT: ret i1 [[TMP8]]
414 %x0 = extractelement <4 x i32> %x, i32 0
415 %x1 = extractelement <4 x i32> %x, i32 1
416 %x2 = extractelement <4 x i32> %x, i32 2
417 %x3 = extractelement <4 x i32> %x, i32 3
418 %c0 = icmp slt i32 %x0, 42
419 %c1 = icmp slt i32 %x1, 42
420 %c2 = icmp slt i32 %x2, 42
421 %c3 = icmp ult i32 %x3, 42 ; predicate changed
422 %d0 = icmp sgt i32 %x0, 17
423 %d1 = icmp sgt i32 %x1, 17
424 %d2 = icmp sgt i32 %x2, 17
425 %d3 = icmp sgt i32 %x3, 17
426 %s1 = select i1 %c0, i1 %c1, i1 false
427 %s2 = select i1 %s1, i1 %c2, i1 false
428 %s3 = select i1 %s2, i1 %c3, i1 false
429 %s4 = select i1 %s3, i1 %d0, i1 false
430 %s5 = select i1 %s4, i1 %d1, i1 false
431 %s6 = select i1 %s5, i1 %d2, i1 false
432 %s7 = select i1 %s6, i1 %d3, i1 false
436 define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
437 ; CHECK-LABEL: @logical_and_icmp_extra_op(
438 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
439 ; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 [[C]], i1 false
440 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
441 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
442 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[S3]], i1 [[TMP3]], i1 false
443 ; CHECK-NEXT: ret i1 [[OP_RDX]]
445 %x0 = extractelement <4 x i32> %x, i32 0
446 %x1 = extractelement <4 x i32> %x, i32 1
447 %x2 = extractelement <4 x i32> %x, i32 2
448 %x3 = extractelement <4 x i32> %x, i32 3
449 %y0 = extractelement <4 x i32> %y, i32 0
450 %y1 = extractelement <4 x i32> %y, i32 1
451 %y2 = extractelement <4 x i32> %y, i32 2
452 %y3 = extractelement <4 x i32> %y, i32 3
453 %d0 = icmp slt i32 %x0, %y0
454 %d1 = icmp slt i32 %x1, %y1
455 %d2 = icmp slt i32 %x2, %y2
456 %d3 = icmp slt i32 %x3, %y3
457 %s3 = select i1 %c, i1 %c, i1 false
458 %s4 = select i1 %s3, i1 %d0, i1 false
459 %s5 = select i1 %s4, i1 %d1, i1 false
460 %s6 = select i1 %s5, i1 %d2, i1 false
461 %s7 = select i1 %s6, i1 %d3, i1 false
465 define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
466 ; CHECK-LABEL: @logical_or_icmp_extra_op(
467 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
468 ; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[C]]
469 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
470 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
471 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[S3]], i1 true, i1 [[TMP3]]
472 ; CHECK-NEXT: ret i1 [[OP_RDX]]
474 %x0 = extractelement <4 x i32> %x, i32 0
475 %x1 = extractelement <4 x i32> %x, i32 1
476 %x2 = extractelement <4 x i32> %x, i32 2
477 %x3 = extractelement <4 x i32> %x, i32 3
478 %y0 = extractelement <4 x i32> %y, i32 0
479 %y1 = extractelement <4 x i32> %y, i32 1
480 %y2 = extractelement <4 x i32> %y, i32 2
481 %y3 = extractelement <4 x i32> %y, i32 3
482 %d0 = icmp slt i32 %x0, %y0
483 %d1 = icmp slt i32 %x1, %y1
484 %d2 = icmp slt i32 %x2, %y2
485 %d3 = icmp slt i32 %x3, %y3
486 %s3 = select i1 %c, i1 true, i1 %c
487 %s4 = select i1 %s3, i1 true, i1 %d0
488 %s5 = select i1 %s4, i1 true, i1 %d1
489 %s6 = select i1 %s5, i1 true, i1 %d2
490 %s7 = select i1 %s6, i1 true, i1 %d3
494 define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) {
495 ; CHECK-LABEL: @logical_and_icmp_extra_args(
496 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 17, i32 17, i32 17, i32 17>
497 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
498 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
499 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false
500 ; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[C1:%.*]]
501 ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP4]], i1 [[C2:%.*]], i1 false
502 ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[OP_RDX]]
503 ; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP5]], i1 [[OP_RDX1]], i1 false
504 ; CHECK-NEXT: ret i1 [[OP_RDX2]]
506 %x0 = extractelement <4 x i32> %x, i32 0
507 %x1 = extractelement <4 x i32> %x, i32 1
508 %x2 = extractelement <4 x i32> %x, i32 2
509 %x3 = extractelement <4 x i32> %x, i32 3
510 %d0 = icmp sgt i32 %x0, 17
511 %d1 = icmp sgt i32 %x1, 17
512 %d2 = icmp sgt i32 %x2, 17
513 %d3 = icmp sgt i32 %x3, 17
514 %s1 = select i1 %d0, i1 %c0, i1 false ; <- d0, d1, d2, d3 gets reduced.
515 %s2 = select i1 %s1, i1 %c1, i1 false ; <- c0, c1, c2 remain scalar.
516 %s3 = select i1 %s2, i1 %c2, i1 false
517 %s5 = select i1 %s3, i1 %d1, i1 false
518 %s6 = select i1 %s5, i1 %d2, i1 false
519 %s7 = select i1 %s6, i1 %d3, i1 false