1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX
7 define i1 @logical_and_icmp(<4 x i32> %x) {
8 ; CHECK-LABEL: @logical_and_icmp(
9 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], zeroinitializer
10 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
11 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
12 ; CHECK-NEXT: ret i1 [[TMP3]]
14 %x0 = extractelement <4 x i32> %x, i32 0
15 %x1 = extractelement <4 x i32> %x, i32 1
16 %x2 = extractelement <4 x i32> %x, i32 2
17 %x3 = extractelement <4 x i32> %x, i32 3
18 %c0 = icmp slt i32 %x0, 0
19 %c1 = icmp slt i32 %x1, 0
20 %c2 = icmp slt i32 %x2, 0
21 %c3 = icmp slt i32 %x3, 0
22 %s1 = select i1 %c0, i1 %c1, i1 false
23 %s2 = select i1 %s1, i1 %c2, i1 false
24 %s3 = select i1 %s2, i1 %c3, i1 false
28 define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) {
29 ; CHECK-LABEL: @logical_or_icmp(
30 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
31 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
32 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
33 ; CHECK-NEXT: ret i1 [[TMP3]]
35 %x0 = extractelement <4 x i32> %x, i32 0
36 %x1 = extractelement <4 x i32> %x, i32 1
37 %x2 = extractelement <4 x i32> %x, i32 2
38 %x3 = extractelement <4 x i32> %x, i32 3
39 %y0 = extractelement <4 x i32> %y, i32 0
40 %y1 = extractelement <4 x i32> %y, i32 1
41 %y2 = extractelement <4 x i32> %y, i32 2
42 %y3 = extractelement <4 x i32> %y, i32 3
43 %c0 = icmp slt i32 %x0, %y0
44 %c1 = icmp slt i32 %x1, %y1
45 %c2 = icmp slt i32 %x2, %y2
46 %c3 = icmp slt i32 %x3, %y3
47 %s1 = select i1 %c0, i1 true, i1 %c1
48 %s2 = select i1 %s1, i1 true, i1 %c2
49 %s3 = select i1 %s2, i1 true, i1 %c3
53 define i1 @logical_and_fcmp(<4 x float> %x) {
54 ; CHECK-LABEL: @logical_and_fcmp(
55 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
56 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
57 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
58 ; CHECK-NEXT: ret i1 [[TMP3]]
60 %x0 = extractelement <4 x float> %x, i32 0
61 %x1 = extractelement <4 x float> %x, i32 1
62 %x2 = extractelement <4 x float> %x, i32 2
63 %x3 = extractelement <4 x float> %x, i32 3
64 %c0 = fcmp olt float %x0, 0.0
65 %c1 = fcmp olt float %x1, 0.0
66 %c2 = fcmp olt float %x2, 0.0
67 %c3 = fcmp olt float %x3, 0.0
68 %s1 = select i1 %c0, i1 %c1, i1 false
69 %s2 = select i1 %s1, i1 %c2, i1 false
70 %s3 = select i1 %s2, i1 %c3, i1 false
74 define i1 @logical_or_fcmp(<4 x float> %x) {
75 ; CHECK-LABEL: @logical_or_fcmp(
76 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
77 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
78 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
79 ; CHECK-NEXT: ret i1 [[TMP3]]
81 %x0 = extractelement <4 x float> %x, i32 0
82 %x1 = extractelement <4 x float> %x, i32 1
83 %x2 = extractelement <4 x float> %x, i32 2
84 %x3 = extractelement <4 x float> %x, i32 3
85 %c0 = fcmp olt float %x0, 0.0
86 %c1 = fcmp olt float %x1, 0.0
87 %c2 = fcmp olt float %x2, 0.0
88 %c3 = fcmp olt float %x3, 0.0
89 %s1 = select i1 %c0, i1 true, i1 %c1
90 %s2 = select i1 %s1, i1 true, i1 %c2
91 %s3 = select i1 %s2, i1 true, i1 %c3
95 define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
96 ; SSE-LABEL: @logical_and_icmp_diff_preds(
97 ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
98 ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
99 ; SSE-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
100 ; SSE-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
101 ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
102 ; SSE-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
103 ; SSE-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
104 ; SSE-NEXT: ret i1 [[TMP7]]
106 ; AVX-LABEL: @logical_and_icmp_diff_preds(
107 ; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
108 ; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
109 ; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
110 ; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
111 ; AVX-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
112 ; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0
113 ; AVX-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0
114 ; AVX-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0
115 ; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
116 ; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
117 ; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
118 ; AVX-NEXT: ret i1 [[S3]]
120 %x0 = extractelement <4 x i32> %x, i32 0
121 %x1 = extractelement <4 x i32> %x, i32 1
122 %x2 = extractelement <4 x i32> %x, i32 2
123 %x3 = extractelement <4 x i32> %x, i32 3
124 %c0 = icmp ult i32 %x0, 0
125 %c1 = icmp slt i32 %x1, 0
126 %c2 = icmp sgt i32 %x2, 0
127 %c3 = icmp slt i32 %x3, 0
128 %s1 = select i1 %c0, i1 %c1, i1 false
129 %s2 = select i1 %s1, i1 %c2, i1 false
130 %s3 = select i1 %s2, i1 %c3, i1 false
134 define i1 @logical_and_icmp_diff_const(<4 x i32> %x) {
135 ; CHECK-LABEL: @logical_and_icmp_diff_const(
136 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 0, i32 1, i32 2, i32 3>
137 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
138 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
139 ; CHECK-NEXT: ret i1 [[TMP3]]
141 %x0 = extractelement <4 x i32> %x, i32 0
142 %x1 = extractelement <4 x i32> %x, i32 1
143 %x2 = extractelement <4 x i32> %x, i32 2
144 %x3 = extractelement <4 x i32> %x, i32 3
145 %c0 = icmp sgt i32 %x0, 0
146 %c1 = icmp sgt i32 %x1, 1
147 %c2 = icmp sgt i32 %x2, 2
148 %c3 = icmp sgt i32 %x3, 3
149 %s1 = select i1 %c0, i1 %c1, i1 false
150 %s2 = select i1 %s1, i1 %c2, i1 false
151 %s3 = select i1 %s2, i1 %c3, i1 false
155 define i1 @mixed_logical_icmp(<4 x i32> %x) {
156 ; CHECK-LABEL: @mixed_logical_icmp(
157 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
158 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
159 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
160 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false
161 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
162 ; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[TMP4]]
163 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
164 ; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false
165 ; CHECK-NEXT: ret i1 [[S3]]
167 %x0 = extractelement <4 x i32> %x, i32 0
168 %x1 = extractelement <4 x i32> %x, i32 1
169 %x2 = extractelement <4 x i32> %x, i32 2
170 %x3 = extractelement <4 x i32> %x, i32 3
171 %c0 = icmp sgt i32 %x0, 0
172 %c1 = icmp sgt i32 %x1, 0
173 %c2 = icmp sgt i32 %x2, 0
174 %c3 = icmp sgt i32 %x3, 0
175 %s1 = select i1 %c0, i1 %c1, i1 false
176 %s2 = select i1 %s1, i1 true, i1 %c2
177 %s3 = select i1 %s2, i1 %c3, i1 false
181 define i1 @logical_and_icmp_subvec(<4 x i32> %x) {
182 ; CHECK-LABEL: @logical_and_icmp_subvec(
183 ; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
184 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
185 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
186 ; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0
187 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
188 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
189 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
190 ; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
191 ; CHECK-NEXT: ret i1 [[S2]]
193 %x0 = extractelement <4 x i32> %x, i32 0
194 %x1 = extractelement <4 x i32> %x, i32 1
195 %x2 = extractelement <4 x i32> %x, i32 2
196 %c0 = icmp slt i32 %x0, 0
197 %c1 = icmp slt i32 %x1, 0
198 %c2 = icmp slt i32 %x2, 0
199 %s1 = select i1 %c0, i1 %c1, i1 false
200 %s2 = select i1 %s1, i1 %c2, i1 false
204 ; TODO: This is better than all-scalar and still safe,
205 ; but we want this to be 2 reductions with glue
206 ; logic...or a wide reduction?
208 define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
209 ; CHECK-LABEL: @logical_and_icmp_clamp(
210 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
211 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
212 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
213 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
214 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
215 ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
216 ; CHECK-NEXT: ret i1 [[TMP6]]
218 %x0 = extractelement <4 x i32> %x, i32 0
219 %x1 = extractelement <4 x i32> %x, i32 1
220 %x2 = extractelement <4 x i32> %x, i32 2
221 %x3 = extractelement <4 x i32> %x, i32 3
222 %c0 = icmp slt i32 %x0, 42
223 %c1 = icmp slt i32 %x1, 42
224 %c2 = icmp slt i32 %x2, 42
225 %c3 = icmp slt i32 %x3, 42
226 %d0 = icmp sgt i32 %x0, 17
227 %d1 = icmp sgt i32 %x1, 17
228 %d2 = icmp sgt i32 %x2, 17
229 %d3 = icmp sgt i32 %x3, 17
230 %s1 = select i1 %c0, i1 %c1, i1 false
231 %s2 = select i1 %s1, i1 %c2, i1 false
232 %s3 = select i1 %s2, i1 %c3, i1 false
233 %s4 = select i1 %s3, i1 %d0, i1 false
234 %s5 = select i1 %s4, i1 %d1, i1 false
235 %s6 = select i1 %s5, i1 %d2, i1 false
236 %s7 = select i1 %s6, i1 %d3, i1 false
240 define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
241 ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
242 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
243 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
244 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
245 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
246 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
247 ; CHECK-NEXT: call void @use1(i1 [[TMP5]])
248 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
249 ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
250 ; CHECK-NEXT: ret i1 [[TMP7]]
252 %x0 = extractelement <4 x i32> %x, i32 0
253 %x1 = extractelement <4 x i32> %x, i32 1
254 %x2 = extractelement <4 x i32> %x, i32 2
255 %x3 = extractelement <4 x i32> %x, i32 3
256 %c0 = icmp slt i32 %x0, 42
257 %c1 = icmp slt i32 %x1, 42
258 %c2 = icmp slt i32 %x2, 42
259 call void @use1(i1 %c2)
260 %c3 = icmp slt i32 %x3, 42
261 %d0 = icmp sgt i32 %x0, 17
262 %d1 = icmp sgt i32 %x1, 17
263 %d2 = icmp sgt i32 %x2, 17
264 %d3 = icmp sgt i32 %x3, 17
265 %s1 = select i1 %c0, i1 %c1, i1 false
266 %s2 = select i1 %s1, i1 %c2, i1 false
267 %s3 = select i1 %s2, i1 %c3, i1 false
268 %s4 = select i1 %s3, i1 %d0, i1 false
269 %s5 = select i1 %s4, i1 %d1, i1 false
270 %s6 = select i1 %s5, i1 %d2, i1 false
271 %s7 = select i1 %s6, i1 %d3, i1 false
275 define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
276 ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select(
277 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42)
278 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
279 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
280 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
281 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
282 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
283 ; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP5]], i1 false
284 ; CHECK-NEXT: call void @use1(i1 [[S2]])
285 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]]
286 ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
287 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
288 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP8]], i1 false
289 ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[S2]], i1 [[OP_RDX]], i1 false
290 ; CHECK-NEXT: ret i1 [[OP_RDX1]]
292 %x0 = extractelement <4 x i32> %x, i32 0
293 %x1 = extractelement <4 x i32> %x, i32 1
294 %x2 = extractelement <4 x i32> %x, i32 2
295 %x3 = extractelement <4 x i32> %x, i32 3
296 %c0 = icmp slt i32 %x0, 42
297 %c1 = icmp slt i32 %x1, 42
298 %c2 = icmp slt i32 %x2, 42
299 %c3 = icmp slt i32 %x3, 42
300 %d0 = icmp sgt i32 %x0, 17
301 %d1 = icmp sgt i32 %x1, 17
302 %d2 = icmp sgt i32 %x2, 17
303 %d3 = icmp sgt i32 %x3, 17
304 %s1 = select i1 %c0, i1 %c1, i1 false
305 %s2 = select i1 %s1, i1 %c2, i1 false
306 call void @use1(i1 %s2)
307 %s3 = select i1 %s2, i1 %c3, i1 false
308 %s4 = select i1 %s3, i1 %d0, i1 false
309 %s5 = select i1 %s4, i1 %d1, i1 false
310 %s6 = select i1 %s5, i1 %d2, i1 false
311 %s7 = select i1 %s6, i1 %d3, i1 false
315 define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
316 ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32(
317 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
318 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
319 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, <4 x i32> [[TMP2]], i64 4)
320 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]]
321 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
322 ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
323 ; CHECK-NEXT: ret i1 [[TMP6]]
325 %x0 = extractelement <8 x i32> %x, i32 0
326 %x1 = extractelement <8 x i32> %x, i32 1
327 %x2 = extractelement <8 x i32> %x, i32 2
328 %x3 = extractelement <8 x i32> %x, i32 3
329 %y0 = extractelement <8 x i32> %y, i32 0
330 %y1 = extractelement <8 x i32> %y, i32 1
331 %y2 = extractelement <8 x i32> %y, i32 2
332 %y3 = extractelement <8 x i32> %y, i32 3
333 %c0 = icmp slt i32 %x0, 42
334 %c1 = icmp slt i32 %x1, 42
335 %c2 = icmp slt i32 %x2, 42
336 %c3 = icmp slt i32 %x3, 42
337 %d0 = icmp slt i32 %x0, %y0
338 %d1 = icmp slt i32 %x1, %y1
339 %d2 = icmp slt i32 %x2, %y2
340 %d3 = icmp slt i32 %x3, %y3
341 %s1 = select i1 %c0, i1 %c1, i1 false
342 %s2 = select i1 %s1, i1 %c2, i1 false
343 %s3 = select i1 %s2, i1 %c3, i1 false
344 %s4 = select i1 %s3, i1 %d0, i1 false
345 %s5 = select i1 %s4, i1 %d1, i1 false
346 %s6 = select i1 %s5, i1 %d2, i1 false
347 %s7 = select i1 %s6, i1 %d3, i1 false
351 define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
352 ; CHECK-LABEL: @logical_and_icmp_clamp_partial(
353 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
354 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
355 ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], splat (i32 42)
356 ; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
357 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
358 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
359 ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
360 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
361 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false
362 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
363 ; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]]
364 ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false
365 ; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[OP_RDX]]
366 ; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP10]], i1 [[OP_RDX1]], i1 false
367 ; CHECK-NEXT: ret i1 [[OP_RDX2]]
369 %x0 = extractelement <4 x i32> %x, i32 0
370 %x1 = extractelement <4 x i32> %x, i32 1
371 %x2 = extractelement <4 x i32> %x, i32 2
372 %x3 = extractelement <4 x i32> %x, i32 3
373 %c0 = icmp slt i32 %x0, 42
374 %c1 = icmp slt i32 %x1, 42
375 %c2 = icmp slt i32 %x2, 42
376 ; remove an element from the previous test
377 %d0 = icmp sgt i32 %x0, 17
378 %d1 = icmp sgt i32 %x1, 17
379 %d2 = icmp sgt i32 %x2, 17
380 %d3 = icmp sgt i32 %x3, 17
381 %s1 = select i1 %c0, i1 %c1, i1 false
382 %s2 = select i1 %s1, i1 %c2, i1 false
383 ; remove an element from the previous test
384 %s4 = select i1 %s2, i1 %d0, i1 false
385 %s5 = select i1 %s4, i1 %d1, i1 false
386 %s6 = select i1 %s5, i1 %d2, i1 false
387 %s7 = select i1 %s6, i1 %d3, i1 false
391 define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
392 ; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff(
393 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
394 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 42, i32 42, i32 42, i32 poison>, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 3>
395 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[X]], i64 0)
396 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 17, i32 17, i32 17, i32 17, i32 poison, i32 poison, i32 poison, i32 42>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 15>
397 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i32> [[TMP3]], [[TMP4]]
398 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <8 x i32> [[TMP3]], [[TMP4]]
399 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
400 ; CHECK-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]]
401 ; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]])
402 ; CHECK-NEXT: ret i1 [[TMP9]]
404 %x0 = extractelement <4 x i32> %x, i32 0
405 %x1 = extractelement <4 x i32> %x, i32 1
406 %x2 = extractelement <4 x i32> %x, i32 2
407 %x3 = extractelement <4 x i32> %x, i32 3
408 %c0 = icmp slt i32 %x0, 42
409 %c1 = icmp slt i32 %x1, 42
410 %c2 = icmp slt i32 %x2, 42
411 %c3 = icmp ult i32 %x3, 42 ; predicate changed
412 %d0 = icmp sgt i32 %x0, 17
413 %d1 = icmp sgt i32 %x1, 17
414 %d2 = icmp sgt i32 %x2, 17
415 %d3 = icmp sgt i32 %x3, 17
416 %s1 = select i1 %c0, i1 %c1, i1 false
417 %s2 = select i1 %s1, i1 %c2, i1 false
418 %s3 = select i1 %s2, i1 %c3, i1 false
419 %s4 = select i1 %s3, i1 %d0, i1 false
420 %s5 = select i1 %s4, i1 %d1, i1 false
421 %s6 = select i1 %s5, i1 %d2, i1 false
422 %s7 = select i1 %s6, i1 %d3, i1 false
426 define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
427 ; CHECK-LABEL: @logical_and_icmp_extra_op(
428 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
429 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
430 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
431 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C:%.*]], i1 false
432 ; CHECK-NEXT: ret i1 [[OP_RDX]]
434 %x0 = extractelement <4 x i32> %x, i32 0
435 %x1 = extractelement <4 x i32> %x, i32 1
436 %x2 = extractelement <4 x i32> %x, i32 2
437 %x3 = extractelement <4 x i32> %x, i32 3
438 %y0 = extractelement <4 x i32> %y, i32 0
439 %y1 = extractelement <4 x i32> %y, i32 1
440 %y2 = extractelement <4 x i32> %y, i32 2
441 %y3 = extractelement <4 x i32> %y, i32 3
442 %d0 = icmp slt i32 %x0, %y0
443 %d1 = icmp slt i32 %x1, %y1
444 %d2 = icmp slt i32 %x2, %y2
445 %d3 = icmp slt i32 %x3, %y3
446 %s3 = select i1 %c, i1 %c, i1 false
447 %s4 = select i1 %s3, i1 %d0, i1 false
448 %s5 = select i1 %s4, i1 %d1, i1 false
449 %s6 = select i1 %s5, i1 %d2, i1 false
450 %s7 = select i1 %s6, i1 %d3, i1 false
454 define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
455 ; CHECK-LABEL: @logical_or_icmp_extra_op(
456 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
457 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
458 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
459 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 true, i1 [[C:%.*]]
460 ; CHECK-NEXT: ret i1 [[OP_RDX]]
462 %x0 = extractelement <4 x i32> %x, i32 0
463 %x1 = extractelement <4 x i32> %x, i32 1
464 %x2 = extractelement <4 x i32> %x, i32 2
465 %x3 = extractelement <4 x i32> %x, i32 3
466 %y0 = extractelement <4 x i32> %y, i32 0
467 %y1 = extractelement <4 x i32> %y, i32 1
468 %y2 = extractelement <4 x i32> %y, i32 2
469 %y3 = extractelement <4 x i32> %y, i32 3
470 %d0 = icmp slt i32 %x0, %y0
471 %d1 = icmp slt i32 %x1, %y1
472 %d2 = icmp slt i32 %x2, %y2
473 %d3 = icmp slt i32 %x3, %y3
474 %s3 = select i1 %c, i1 true, i1 %c
475 %s4 = select i1 %s3, i1 true, i1 %d0
476 %s5 = select i1 %s4, i1 true, i1 %d1
477 %s6 = select i1 %s5, i1 true, i1 %d2
478 %s7 = select i1 %s6, i1 true, i1 %d3
482 define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) {
483 ; CHECK-LABEL: @logical_and_icmp_extra_args(
484 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 17)
485 ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
486 ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
487 ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false
488 ; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[C1:%.*]]
489 ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP4]], i1 [[C2:%.*]], i1 false
490 ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[OP_RDX]]
491 ; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP5]], i1 [[OP_RDX1]], i1 false
492 ; CHECK-NEXT: ret i1 [[OP_RDX2]]
494 %x0 = extractelement <4 x i32> %x, i32 0
495 %x1 = extractelement <4 x i32> %x, i32 1
496 %x2 = extractelement <4 x i32> %x, i32 2
497 %x3 = extractelement <4 x i32> %x, i32 3
498 %d0 = icmp sgt i32 %x0, 17
499 %d1 = icmp sgt i32 %x1, 17
500 %d2 = icmp sgt i32 %x2, 17
501 %d3 = icmp sgt i32 %x3, 17
502 %s1 = select i1 %d0, i1 %c0, i1 false ; <- d0, d1, d2, d3 gets reduced.
503 %s2 = select i1 %s1, i1 %c1, i1 false ; <- c0, c1, c2 remain scalar.
504 %s3 = select i1 %s2, i1 %c2, i1 false
505 %s5 = select i1 %s3, i1 %d1, i1 false
506 %s6 = select i1 %s5, i1 %d2, i1 false
507 %s7 = select i1 %s6, i1 %d3, i1 false