1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -slp-vectorizer -S %s | FileCheck %s
3 ; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s
5 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6 target triple = "arm64-apple-ios5.0.0"
8 define void @select_umin_8xi16(i16* %ptr, i16 %x) {
9 ; CHECK-LABEL: @select_umin_8xi16(
11 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
12 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
13 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
14 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
15 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
16 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
17 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
18 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
19 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
20 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
21 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
22 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
23 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
24 ; CHECK-NEXT: ret void
27 %l.0 = load i16, i16* %ptr
28 %cmp.0 = icmp ult i16 %l.0, 16383
29 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
30 store i16 %s.0, i16* %ptr, align 2
32 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
33 %l.1 = load i16, i16* %gep.1
34 %cmp.1 = icmp ult i16 %l.1, 16383
35 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
36 store i16 %s.1, i16* %gep.1, align 2
38 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
39 %l.2 = load i16, i16* %gep.2
40 %cmp.2 = icmp ult i16 %l.2, 16383
41 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
42 store i16 %s.2, i16* %gep.2, align 2
44 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
45 %l.3 = load i16, i16* %gep.3
46 %cmp.3 = icmp ult i16 %l.3, 16383
47 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
48 store i16 %s.3, i16* %gep.3, align 2
50 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
51 %l.4 = load i16, i16* %gep.4
52 %cmp.4 = icmp ult i16 %l.4, 16383
53 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
54 store i16 %s.4, i16* %gep.4, align 2
56 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
57 %l.5 = load i16, i16* %gep.5
58 %cmp.5 = icmp ult i16 %l.5, 16383
59 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
60 store i16 %s.5, i16* %gep.5, align 2
62 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
63 %l.6 = load i16, i16* %gep.6
64 %cmp.6 = icmp ult i16 %l.6, 16383
65 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
66 store i16 %s.6, i16* %gep.6, align 2
68 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
69 %l.7 = load i16, i16* %gep.7
70 %cmp.7 = icmp ult i16 %l.7, 16383
71 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
72 store i16 %s.7, i16* %gep.7, align 2
76 define void @select_umin_4xi32(i32* %ptr, i32 %x) {
77 ; CHECK-LABEL: @select_umin_4xi32(
79 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
80 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
81 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
82 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
83 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
84 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
85 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
86 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
87 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
88 ; CHECK-NEXT: ret void
91 %l.0 = load i32, i32* %ptr
92 %cmp.0 = icmp ult i32 %l.0, 16383
93 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
94 store i32 %s.0, i32* %ptr, align 4
96 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
97 %l.1 = load i32, i32* %gep.1
98 %cmp.1 = icmp ult i32 %l.1, 16383
99 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
100 store i32 %s.1, i32* %gep.1, align 4
102 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
103 %l.2 = load i32, i32* %gep.2
104 %cmp.2 = icmp ult i32 %l.2, 16383
105 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
106 store i32 %s.2, i32* %gep.2, align 4
108 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
109 %l.3 = load i32, i32* %gep.3
110 %cmp.3 = icmp ult i32 %l.3, 16383
111 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
112 store i32 %s.3, i32* %gep.3, align 4
117 define void @select_ule_ugt_mix_4xi32(i32* %ptr, i32 %x) {
118 ; CHECK-LABEL: @select_ule_ugt_mix_4xi32(
120 ; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
121 ; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383
122 ; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
123 ; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
124 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
125 ; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
126 ; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383
127 ; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
128 ; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
129 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
130 ; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
131 ; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383
132 ; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
133 ; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
134 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
135 ; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
136 ; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383
137 ; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
138 ; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
139 ; CHECK-NEXT: ret void
142 %l.0 = load i32, i32* %ptr
143 %cmp.0 = icmp ult i32 %l.0, 16383
144 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
145 store i32 %s.0, i32* %ptr, align 4
147 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
148 %l.1 = load i32, i32* %gep.1
149 %cmp.1 = icmp ugt i32 %l.1, 16383
150 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
151 store i32 %s.1, i32* %gep.1, align 4
153 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
154 %l.2 = load i32, i32* %gep.2
155 %cmp.2 = icmp ult i32 %l.2, 16383
156 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
157 store i32 %s.2, i32* %gep.2, align 4
159 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
160 %l.3 = load i32, i32* %gep.3
161 %cmp.3 = icmp ugt i32 %l.3, 16383
162 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
163 store i32 %s.3, i32* %gep.3, align 4
168 ; There is no <2 x i64> version of umin, but we can efficiently lower
169 ; compare/select pairs with uniform predicates.
170 define void @select_umin_2xi64(i64* %ptr, i64 %x) {
171 ; CHECK-LABEL: @select_umin_2xi64(
173 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
174 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
175 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
176 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383>
177 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
178 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
179 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
180 ; CHECK-NEXT: ret void
183 %l.0 = load i64, i64* %ptr
184 %cmp.0 = icmp ult i64 %l.0, 16383
185 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
186 store i64 %s.0, i64* %ptr, align 4
188 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
189 %l.1 = load i64, i64* %gep.1
190 %cmp.1 = icmp ult i64 %l.1, 16383
191 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
192 store i64 %s.1, i64* %gep.1, align 4
198 define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) {
199 ; CHECK-LABEL: @select_umin_ule_8xi16(
201 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
202 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
203 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
204 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
205 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
206 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
207 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
208 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
209 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
210 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
211 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
212 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
213 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
214 ; CHECK-NEXT: ret void
217 %l.0 = load i16, i16* %ptr
218 %cmp.0 = icmp ule i16 %l.0, 16383
219 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
220 store i16 %s.0, i16* %ptr, align 2
222 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
223 %l.1 = load i16, i16* %gep.1
224 %cmp.1 = icmp ule i16 %l.1, 16383
225 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
226 store i16 %s.1, i16* %gep.1, align 2
228 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
229 %l.2 = load i16, i16* %gep.2
230 %cmp.2 = icmp ule i16 %l.2, 16383
231 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
232 store i16 %s.2, i16* %gep.2, align 2
234 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
235 %l.3 = load i16, i16* %gep.3
236 %cmp.3 = icmp ule i16 %l.3, 16383
237 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
238 store i16 %s.3, i16* %gep.3, align 2
240 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
241 %l.4 = load i16, i16* %gep.4
242 %cmp.4 = icmp ule i16 %l.4, 16383
243 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
244 store i16 %s.4, i16* %gep.4, align 2
246 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
247 %l.5 = load i16, i16* %gep.5
248 %cmp.5 = icmp ule i16 %l.5, 16383
249 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
250 store i16 %s.5, i16* %gep.5, align 2
252 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
253 %l.6 = load i16, i16* %gep.6
254 %cmp.6 = icmp ule i16 %l.6, 16383
255 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
256 store i16 %s.6, i16* %gep.6, align 2
258 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
259 %l.7 = load i16, i16* %gep.7
260 %cmp.7 = icmp ule i16 %l.7, 16383
261 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
262 store i16 %s.7, i16* %gep.7, align 2
266 define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) {
267 ; CHECK-LABEL: @select_umin_ule_4xi32(
269 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
270 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
271 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
272 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
273 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
274 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
275 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
276 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
277 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
278 ; CHECK-NEXT: ret void
281 %l.0 = load i32, i32* %ptr
282 %cmp.0 = icmp ule i32 %l.0, 16383
283 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
284 store i32 %s.0, i32* %ptr, align 4
286 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
287 %l.1 = load i32, i32* %gep.1
288 %cmp.1 = icmp ule i32 %l.1, 16383
289 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
290 store i32 %s.1, i32* %gep.1, align 4
292 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
293 %l.2 = load i32, i32* %gep.2
294 %cmp.2 = icmp ule i32 %l.2, 16383
295 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
296 store i32 %s.2, i32* %gep.2, align 4
298 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
299 %l.3 = load i32, i32* %gep.3
300 %cmp.3 = icmp ule i32 %l.3, 16383
301 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
302 store i32 %s.3, i32* %gep.3, align 4
307 ; There is no <2 x i64> version of umin, but we can efficiently lower
308 ; compare/select pairs with uniform predicates.
309 define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) {
310 ; CHECK-LABEL: @select_umin_ule_2xi64(
312 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
313 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
314 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
315 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383>
316 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
317 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
318 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
319 ; CHECK-NEXT: ret void
322 %l.0 = load i64, i64* %ptr
323 %cmp.0 = icmp ule i64 %l.0, 16383
324 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
325 store i64 %s.0, i64* %ptr, align 4
327 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
328 %l.1 = load i64, i64* %gep.1
329 %cmp.1 = icmp ule i64 %l.1, 16383
330 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
331 store i64 %s.1, i64* %gep.1, align 4
336 define void @select_smin_8xi16(i16* %ptr, i16 %x) {
337 ; CHECK-LABEL: @select_smin_8xi16(
339 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
340 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
341 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
342 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
343 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
344 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
345 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
346 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
347 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
348 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
349 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
350 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
351 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
352 ; CHECK-NEXT: ret void
355 %l.0 = load i16, i16* %ptr
356 %cmp.0 = icmp slt i16 %l.0, 16383
357 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
358 store i16 %s.0, i16* %ptr, align 2
360 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
361 %l.1 = load i16, i16* %gep.1
362 %cmp.1 = icmp slt i16 %l.1, 16383
363 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
364 store i16 %s.1, i16* %gep.1, align 2
366 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
367 %l.2 = load i16, i16* %gep.2
368 %cmp.2 = icmp slt i16 %l.2, 16383
369 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
370 store i16 %s.2, i16* %gep.2, align 2
372 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
373 %l.3 = load i16, i16* %gep.3
374 %cmp.3 = icmp slt i16 %l.3, 16383
375 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
376 store i16 %s.3, i16* %gep.3, align 2
378 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
379 %l.4 = load i16, i16* %gep.4
380 %cmp.4 = icmp slt i16 %l.4, 16383
381 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
382 store i16 %s.4, i16* %gep.4, align 2
384 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
385 %l.5 = load i16, i16* %gep.5
386 %cmp.5 = icmp slt i16 %l.5, 16383
387 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
388 store i16 %s.5, i16* %gep.5, align 2
390 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
391 %l.6 = load i16, i16* %gep.6
392 %cmp.6 = icmp slt i16 %l.6, 16383
393 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
394 store i16 %s.6, i16* %gep.6, align 2
396 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
397 %l.7 = load i16, i16* %gep.7
398 %cmp.7 = icmp slt i16 %l.7, 16383
399 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
400 store i16 %s.7, i16* %gep.7, align 2
404 define void @select_smin_4xi32(i32* %ptr, i32 %x) {
405 ; CHECK-LABEL: @select_smin_4xi32(
407 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
408 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
409 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
410 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
411 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
412 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
413 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
414 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
415 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
416 ; CHECK-NEXT: ret void
419 %l.0 = load i32, i32* %ptr
420 %cmp.0 = icmp slt i32 %l.0, 16383
421 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
422 store i32 %s.0, i32* %ptr, align 4
424 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
425 %l.1 = load i32, i32* %gep.1
426 %cmp.1 = icmp slt i32 %l.1, 16383
427 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
428 store i32 %s.1, i32* %gep.1, align 4
430 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
431 %l.2 = load i32, i32* %gep.2
432 %cmp.2 = icmp slt i32 %l.2, 16383
433 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
434 store i32 %s.2, i32* %gep.2, align 4
436 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
437 %l.3 = load i32, i32* %gep.3
438 %cmp.3 = icmp slt i32 %l.3, 16383
439 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
440 store i32 %s.3, i32* %gep.3, align 4
445 ; There is no <2 x i64> version of smin, but we can efficiently lower
446 ; compare/select pairs with uniform predicates.
447 define void @select_smin_2xi64(i64* %ptr, i64 %x) {
448 ; CHECK-LABEL: @select_smin_2xi64(
450 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
451 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
452 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
453 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
454 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
455 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
456 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
457 ; CHECK-NEXT: ret void
460 %l.0 = load i64, i64* %ptr
461 %cmp.0 = icmp slt i64 %l.0, 16383
462 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
463 store i64 %s.0, i64* %ptr, align 4
465 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
466 %l.1 = load i64, i64* %gep.1
467 %cmp.1 = icmp slt i64 %l.1, 16383
468 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
469 store i64 %s.1, i64* %gep.1, align 4
474 define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) {
475 ; CHECK-LABEL: @select_smin_sle_8xi16(
477 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
478 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
479 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
480 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
481 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
482 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
483 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
484 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
485 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
486 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
487 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
488 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
489 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
490 ; CHECK-NEXT: ret void
493 %l.0 = load i16, i16* %ptr
494 %cmp.0 = icmp sle i16 %l.0, 16383
495 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
496 store i16 %s.0, i16* %ptr, align 2
498 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
499 %l.1 = load i16, i16* %gep.1
500 %cmp.1 = icmp sle i16 %l.1, 16383
501 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
502 store i16 %s.1, i16* %gep.1, align 2
504 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
505 %l.2 = load i16, i16* %gep.2
506 %cmp.2 = icmp sle i16 %l.2, 16383
507 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
508 store i16 %s.2, i16* %gep.2, align 2
510 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
511 %l.3 = load i16, i16* %gep.3
512 %cmp.3 = icmp sle i16 %l.3, 16383
513 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
514 store i16 %s.3, i16* %gep.3, align 2
516 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
517 %l.4 = load i16, i16* %gep.4
518 %cmp.4 = icmp sle i16 %l.4, 16383
519 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
520 store i16 %s.4, i16* %gep.4, align 2
522 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
523 %l.5 = load i16, i16* %gep.5
524 %cmp.5 = icmp sle i16 %l.5, 16383
525 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
526 store i16 %s.5, i16* %gep.5, align 2
528 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
529 %l.6 = load i16, i16* %gep.6
530 %cmp.6 = icmp sle i16 %l.6, 16383
531 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
532 store i16 %s.6, i16* %gep.6, align 2
534 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
535 %l.7 = load i16, i16* %gep.7
536 %cmp.7 = icmp sle i16 %l.7, 16383
537 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
538 store i16 %s.7, i16* %gep.7, align 2
542 define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) {
543 ; CHECK-LABEL: @select_smin_sle_4xi32(
545 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
546 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
547 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
548 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
549 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
550 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
551 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
552 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
553 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
554 ; CHECK-NEXT: ret void
557 %l.0 = load i32, i32* %ptr
558 %cmp.0 = icmp sle i32 %l.0, 16383
559 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
560 store i32 %s.0, i32* %ptr, align 4
562 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
563 %l.1 = load i32, i32* %gep.1
564 %cmp.1 = icmp sle i32 %l.1, 16383
565 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
566 store i32 %s.1, i32* %gep.1, align 4
568 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
569 %l.2 = load i32, i32* %gep.2
570 %cmp.2 = icmp sle i32 %l.2, 16383
571 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
572 store i32 %s.2, i32* %gep.2, align 4
574 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
575 %l.3 = load i32, i32* %gep.3
576 %cmp.3 = icmp sle i32 %l.3, 16383
577 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
578 store i32 %s.3, i32* %gep.3, align 4
583 ; There is no <2 x i64> version of smin, but we can efficiently lower
584 ; compare/select pairs with uniform predicates.
585 define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) {
586 ; CHECK-LABEL: @select_smin_sle_2xi64(
588 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
589 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
590 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
591 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383>
592 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
593 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
594 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
595 ; CHECK-NEXT: ret void
598 %l.0 = load i64, i64* %ptr
599 %cmp.0 = icmp sle i64 %l.0, 16383
600 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
601 store i64 %s.0, i64* %ptr, align 4
603 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
604 %l.1 = load i64, i64* %gep.1
605 %cmp.1 = icmp sle i64 %l.1, 16383
606 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
607 store i64 %s.1, i64* %gep.1, align 4
611 define void @select_umax_8xi16(i16* %ptr, i16 %x) {
612 ; CHECK-LABEL: @select_umax_8xi16(
614 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
615 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
616 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
617 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
618 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
619 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
620 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
621 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
622 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
623 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
624 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
625 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
626 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
627 ; CHECK-NEXT: ret void
630 %l.0 = load i16, i16* %ptr
631 %cmp.0 = icmp ugt i16 %l.0, 16383
632 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
633 store i16 %s.0, i16* %ptr, align 2
635 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
636 %l.1 = load i16, i16* %gep.1
637 %cmp.1 = icmp ugt i16 %l.1, 16383
638 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
639 store i16 %s.1, i16* %gep.1, align 2
641 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
642 %l.2 = load i16, i16* %gep.2
643 %cmp.2 = icmp ugt i16 %l.2, 16383
644 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
645 store i16 %s.2, i16* %gep.2, align 2
647 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
648 %l.3 = load i16, i16* %gep.3
649 %cmp.3 = icmp ugt i16 %l.3, 16383
650 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
651 store i16 %s.3, i16* %gep.3, align 2
653 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
654 %l.4 = load i16, i16* %gep.4
655 %cmp.4 = icmp ugt i16 %l.4, 16383
656 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
657 store i16 %s.4, i16* %gep.4, align 2
659 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
660 %l.5 = load i16, i16* %gep.5
661 %cmp.5 = icmp ugt i16 %l.5, 16383
662 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
663 store i16 %s.5, i16* %gep.5, align 2
665 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
666 %l.6 = load i16, i16* %gep.6
667 %cmp.6 = icmp ugt i16 %l.6, 16383
668 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
669 store i16 %s.6, i16* %gep.6, align 2
671 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
672 %l.7 = load i16, i16* %gep.7
673 %cmp.7 = icmp ugt i16 %l.7, 16383
674 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
675 store i16 %s.7, i16* %gep.7, align 2
679 define void @select_umax_4xi32(i32* %ptr, i32 %x) {
680 ; CHECK-LABEL: @select_umax_4xi32(
682 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
683 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
684 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
685 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
686 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
687 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
688 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
689 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
690 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
691 ; CHECK-NEXT: ret void
694 %l.0 = load i32, i32* %ptr
695 %cmp.0 = icmp ugt i32 %l.0, 16383
696 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
697 store i32 %s.0, i32* %ptr, align 4
699 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
700 %l.1 = load i32, i32* %gep.1
701 %cmp.1 = icmp ugt i32 %l.1, 16383
702 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
703 store i32 %s.1, i32* %gep.1, align 4
705 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
706 %l.2 = load i32, i32* %gep.2
707 %cmp.2 = icmp ugt i32 %l.2, 16383
708 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
709 store i32 %s.2, i32* %gep.2, align 4
711 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
712 %l.3 = load i32, i32* %gep.3
713 %cmp.3 = icmp ugt i32 %l.3, 16383
714 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
715 store i32 %s.3, i32* %gep.3, align 4
720 ; There is no <2 x i64> version of umax, but we can efficiently lower
721 ; compare/select pairs with uniform predicates.
722 define void @select_umax_2xi64(i64* %ptr, i64 %x) {
723 ; CHECK-LABEL: @select_umax_2xi64(
725 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
726 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
727 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
728 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
729 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
730 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
731 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
732 ; CHECK-NEXT: ret void
735 %l.0 = load i64, i64* %ptr
736 %cmp.0 = icmp ugt i64 %l.0, 16383
737 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
738 store i64 %s.0, i64* %ptr, align 4
740 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
741 %l.1 = load i64, i64* %gep.1
742 %cmp.1 = icmp ugt i64 %l.1, 16383
743 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
744 store i64 %s.1, i64* %gep.1, align 4
749 define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) {
750 ; CHECK-LABEL: @select_umax_uge_8xi16(
752 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
753 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
754 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
755 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
756 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
757 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
758 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
759 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
760 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
761 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
762 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
763 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
764 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
765 ; CHECK-NEXT: ret void
768 %l.0 = load i16, i16* %ptr
769 %cmp.0 = icmp uge i16 %l.0, 16383
770 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
771 store i16 %s.0, i16* %ptr, align 2
773 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
774 %l.1 = load i16, i16* %gep.1
775 %cmp.1 = icmp uge i16 %l.1, 16383
776 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
777 store i16 %s.1, i16* %gep.1, align 2
779 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
780 %l.2 = load i16, i16* %gep.2
781 %cmp.2 = icmp uge i16 %l.2, 16383
782 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
783 store i16 %s.2, i16* %gep.2, align 2
785 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
786 %l.3 = load i16, i16* %gep.3
787 %cmp.3 = icmp uge i16 %l.3, 16383
788 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
789 store i16 %s.3, i16* %gep.3, align 2
791 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
792 %l.4 = load i16, i16* %gep.4
793 %cmp.4 = icmp uge i16 %l.4, 16383
794 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
795 store i16 %s.4, i16* %gep.4, align 2
797 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
798 %l.5 = load i16, i16* %gep.5
799 %cmp.5 = icmp uge i16 %l.5, 16383
800 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
801 store i16 %s.5, i16* %gep.5, align 2
803 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
804 %l.6 = load i16, i16* %gep.6
805 %cmp.6 = icmp uge i16 %l.6, 16383
806 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
807 store i16 %s.6, i16* %gep.6, align 2
809 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
810 %l.7 = load i16, i16* %gep.7
811 %cmp.7 = icmp uge i16 %l.7, 16383
812 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
813 store i16 %s.7, i16* %gep.7, align 2
817 define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) {
818 ; CHECK-LABEL: @select_umax_uge_4xi32(
820 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
821 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
822 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
823 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
824 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
825 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
826 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
827 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
828 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
829 ; CHECK-NEXT: ret void
832 %l.0 = load i32, i32* %ptr
833 %cmp.0 = icmp uge i32 %l.0, 16383
834 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
835 store i32 %s.0, i32* %ptr, align 4
837 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
838 %l.1 = load i32, i32* %gep.1
839 %cmp.1 = icmp uge i32 %l.1, 16383
840 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
841 store i32 %s.1, i32* %gep.1, align 4
843 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
844 %l.2 = load i32, i32* %gep.2
845 %cmp.2 = icmp uge i32 %l.2, 16383
846 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
847 store i32 %s.2, i32* %gep.2, align 4
849 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
850 %l.3 = load i32, i32* %gep.3
851 %cmp.3 = icmp uge i32 %l.3, 16383
852 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
853 store i32 %s.3, i32* %gep.3, align 4
858 ; There is no <2 x i64> version of umax, but we can efficiently lower
859 ; compare/select pairs with uniform predicates.
860 define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) {
861 ; CHECK-LABEL: @select_umax_uge_2xi64(
863 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
864 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
865 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
866 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
867 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
868 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
869 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
870 ; CHECK-NEXT: ret void
873 %l.0 = load i64, i64* %ptr
874 %cmp.0 = icmp uge i64 %l.0, 16383
875 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
876 store i64 %s.0, i64* %ptr, align 4
878 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
879 %l.1 = load i64, i64* %gep.1
880 %cmp.1 = icmp uge i64 %l.1, 16383
881 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
882 store i64 %s.1, i64* %gep.1, align 4
887 define void @select_smax_8xi16(i16* %ptr, i16 %x) {
888 ; CHECK-LABEL: @select_smax_8xi16(
890 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
891 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
892 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
893 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
894 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
895 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
896 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
897 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
898 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
899 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
900 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
901 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
902 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
903 ; CHECK-NEXT: ret void
906 %l.0 = load i16, i16* %ptr
907 %cmp.0 = icmp sgt i16 %l.0, 16383
908 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
909 store i16 %s.0, i16* %ptr, align 2
911 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
912 %l.1 = load i16, i16* %gep.1
913 %cmp.1 = icmp sgt i16 %l.1, 16383
914 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
915 store i16 %s.1, i16* %gep.1, align 2
917 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
918 %l.2 = load i16, i16* %gep.2
919 %cmp.2 = icmp sgt i16 %l.2, 16383
920 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
921 store i16 %s.2, i16* %gep.2, align 2
923 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
924 %l.3 = load i16, i16* %gep.3
925 %cmp.3 = icmp sgt i16 %l.3, 16383
926 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
927 store i16 %s.3, i16* %gep.3, align 2
929 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
930 %l.4 = load i16, i16* %gep.4
931 %cmp.4 = icmp sgt i16 %l.4, 16383
932 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
933 store i16 %s.4, i16* %gep.4, align 2
935 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
936 %l.5 = load i16, i16* %gep.5
937 %cmp.5 = icmp sgt i16 %l.5, 16383
938 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
939 store i16 %s.5, i16* %gep.5, align 2
941 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
942 %l.6 = load i16, i16* %gep.6
943 %cmp.6 = icmp sgt i16 %l.6, 16383
944 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
945 store i16 %s.6, i16* %gep.6, align 2
947 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
948 %l.7 = load i16, i16* %gep.7
949 %cmp.7 = icmp sgt i16 %l.7, 16383
950 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
951 store i16 %s.7, i16* %gep.7, align 2
955 define void @select_smax_4xi32(i32* %ptr, i32 %x) {
956 ; CHECK-LABEL: @select_smax_4xi32(
958 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
959 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
960 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
961 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
962 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
963 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
964 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
965 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
966 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
967 ; CHECK-NEXT: ret void
970 %l.0 = load i32, i32* %ptr
971 %cmp.0 = icmp sgt i32 %l.0, 16383
972 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
973 store i32 %s.0, i32* %ptr, align 4
975 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
976 %l.1 = load i32, i32* %gep.1
977 %cmp.1 = icmp sgt i32 %l.1, 16383
978 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
979 store i32 %s.1, i32* %gep.1, align 4
981 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
982 %l.2 = load i32, i32* %gep.2
983 %cmp.2 = icmp sgt i32 %l.2, 16383
984 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
985 store i32 %s.2, i32* %gep.2, align 4
987 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
988 %l.3 = load i32, i32* %gep.3
989 %cmp.3 = icmp sgt i32 %l.3, 16383
990 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
991 store i32 %s.3, i32* %gep.3, align 4
996 ; There is no <2 x i64> version of smax, but we can efficiently lower
997 ; compare/select pairs with uniform predicates.
998 define void @select_smax_2xi64(i64* %ptr, i64 %x) {
999 ; CHECK-LABEL: @select_smax_2xi64(
1000 ; CHECK-NEXT: entry:
1001 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
1002 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1003 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
1004 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
1005 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
1006 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1007 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
1008 ; CHECK-NEXT: ret void
1011 %l.0 = load i64, i64* %ptr
1012 %cmp.0 = icmp sgt i64 %l.0, 16383
1013 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
1014 store i64 %s.0, i64* %ptr, align 4
1016 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
1017 %l.1 = load i64, i64* %gep.1
1018 %cmp.1 = icmp sgt i64 %l.1, 16383
1019 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
1020 store i64 %s.1, i64* %gep.1, align 4
1026 define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) {
1027 ; CHECK-LABEL: @select_smax_sge_8xi16(
1028 ; CHECK-NEXT: entry:
1029 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
1030 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
1031 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
1032 ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
1033 ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
1034 ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
1035 ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
1036 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
1037 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
1038 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
1039 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
1040 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
1041 ; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
1042 ; CHECK-NEXT: ret void
1045 %l.0 = load i16, i16* %ptr
1046 %cmp.0 = icmp sge i16 %l.0, 16383
1047 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
1048 store i16 %s.0, i16* %ptr, align 2
1050 %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
1051 %l.1 = load i16, i16* %gep.1
1052 %cmp.1 = icmp sge i16 %l.1, 16383
1053 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
1054 store i16 %s.1, i16* %gep.1, align 2
1056 %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
1057 %l.2 = load i16, i16* %gep.2
1058 %cmp.2 = icmp sge i16 %l.2, 16383
1059 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
1060 store i16 %s.2, i16* %gep.2, align 2
1062 %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
1063 %l.3 = load i16, i16* %gep.3
1064 %cmp.3 = icmp sge i16 %l.3, 16383
1065 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
1066 store i16 %s.3, i16* %gep.3, align 2
1068 %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
1069 %l.4 = load i16, i16* %gep.4
1070 %cmp.4 = icmp sge i16 %l.4, 16383
1071 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
1072 store i16 %s.4, i16* %gep.4, align 2
1074 %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
1075 %l.5 = load i16, i16* %gep.5
1076 %cmp.5 = icmp sge i16 %l.5, 16383
1077 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
1078 store i16 %s.5, i16* %gep.5, align 2
1080 %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
1081 %l.6 = load i16, i16* %gep.6
1082 %cmp.6 = icmp sge i16 %l.6, 16383
1083 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
1084 store i16 %s.6, i16* %gep.6, align 2
1086 %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
1087 %l.7 = load i16, i16* %gep.7
1088 %cmp.7 = icmp sge i16 %l.7, 16383
1089 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
1090 store i16 %s.7, i16* %gep.7, align 2
1094 define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) {
1095 ; CHECK-LABEL: @select_smax_sge_4xi32(
1096 ; CHECK-NEXT: entry:
1097 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
1098 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
1099 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
1100 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
1101 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1102 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
1103 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
1104 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
1105 ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
1106 ; CHECK-NEXT: ret void
1109 %l.0 = load i32, i32* %ptr
1110 %cmp.0 = icmp sge i32 %l.0, 16383
1111 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
1112 store i32 %s.0, i32* %ptr, align 4
1114 %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
1115 %l.1 = load i32, i32* %gep.1
1116 %cmp.1 = icmp sge i32 %l.1, 16383
1117 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
1118 store i32 %s.1, i32* %gep.1, align 4
1120 %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
1121 %l.2 = load i32, i32* %gep.2
1122 %cmp.2 = icmp sge i32 %l.2, 16383
1123 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
1124 store i32 %s.2, i32* %gep.2, align 4
1126 %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
1127 %l.3 = load i32, i32* %gep.3
1128 %cmp.3 = icmp sge i32 %l.3, 16383
1129 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
1130 store i32 %s.3, i32* %gep.3, align 4
1135 ; There is no <2 x i64> version of smax, but we can efficiently lower
1136 ; compare/select pairs with uniform predicates.
1137 define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) {
1138 ; CHECK-LABEL: @select_smax_sge_2xi64(
1139 ; CHECK-NEXT: entry:
1140 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
1141 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1142 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
1143 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
1144 ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
1145 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
1146 ; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
1147 ; CHECK-NEXT: ret void
1150 %l.0 = load i64, i64* %ptr
1151 %cmp.0 = icmp sge i64 %l.0, 16383
1152 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
1153 store i64 %s.0, i64* %ptr, align 4
1155 %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
1156 %l.1 = load i64, i64* %gep.1
1157 %cmp.1 = icmp sge i64 %l.1, 16383
1158 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
1159 store i64 %s.1, i64* %gep.1, align 4