1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2 ; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
4 ; The LIT tests rely on i32, i16 and i8 being valid machine types.
5 ; The bounds checking tests require also i64 and i128.
6 target datalayout = "n8:16:32:64:128"
8 ; This LIT test checks if TruncInstCombine pass correctly recognizes the
9 ; constraints from a signed min-max clamp. The clamp is a sequence of smin and
10 ; smax instructions limiting a variable into a range, smin <= x <= smax.
12 ; Each LIT test (except the last ones) has two versions depending on the order
14 ; a) y = smax(smin(x, upper_limit), lower_limit)
15 ; b) y = smin(smax(x, lower_limit), upper_limit)
17 ; The clamp is used in TruncInstCombine.cpp pass (as part of aggressive-instcombine)
18 ; to optimize extensions and truncations of lshr. This is what is tested here.
19 ; The pass also optimizes extensions and truncations of other binary operators,
20 ; but in such cases the smin-smax clamp may not be used.
22 define i8 @test_0a(i16 %x) {
23 ; CHECK-LABEL: define i8 @test_0a(
24 ; CHECK-SAME: i16 [[X:%.*]]) {
25 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
26 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
27 ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
28 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 2
29 ; CHECK-NEXT: ret i8 [[B]]
31 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31)
32 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
33 %a = sext i16 %2 to i32
35 %b.trunc = trunc i32 %b to i8
39 define i8 @test_0b(i16 %x) {
40 ; CHECK-LABEL: define i8 @test_0b(
41 ; CHECK-SAME: i16 [[X:%.*]]) {
42 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 0)
43 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
44 ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
45 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 2
46 ; CHECK-NEXT: ret i8 [[B]]
48 %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 0)
49 %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31)
50 %a = sext i16 %2 to i32
52 %b.trunc = trunc i32 %b to i8
56 ; The following two tests contain add instead of lshr.
57 ; The optimization works here as well.
58 define i8 @test_1a(i16 %x) {
59 ; CHECK-LABEL: define i8 @test_1a(
60 ; CHECK-SAME: i16 [[X:%.*]]) {
61 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
62 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
63 ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
64 ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
65 ; CHECK-NEXT: ret i8 [[B]]
67 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31)
68 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
69 %a = sext i16 %2 to i32
71 %b.trunc = trunc i32 %b to i8
75 define i8 @test_1b(i16 %x) {
76 ; CHECK-LABEL: define i8 @test_1b(
77 ; CHECK-SAME: i16 [[X:%.*]]) {
78 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 0)
79 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
80 ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
81 ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
82 ; CHECK-NEXT: ret i8 [[B]]
84 %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 0)
85 %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31)
86 %a = sext i16 %2 to i32
88 %b.trunc = trunc i32 %b to i8
92 ; Tests for clamping with negative min and max.
94 ; With sext no optimization occurs.
95 define i8 @test_2a(i16 %x) {
96 ; CHECK-LABEL: define i8 @test_2a(
97 ; CHECK-SAME: i16 [[X:%.*]]) {
98 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1)
99 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
100 ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32
101 ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2
102 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
103 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
105 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 -1)
106 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31)
107 %a = sext i16 %2 to i32
109 %b.trunc = trunc i32 %b to i8
113 define i8 @test_2b(i16 %x) {
114 ; CHECK-LABEL: define i8 @test_2b(
115 ; CHECK-SAME: i16 [[X:%.*]]) {
116 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
117 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1)
118 ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32
119 ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2
120 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
121 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
123 %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31)
124 %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 -1)
125 %a = sext i16 %2 to i32
127 %b.trunc = trunc i32 %b to i8
131 ; With zext the optimization occurs.
132 define i8 @test_2c(i16 %x) {
133 ; CHECK-LABEL: define i8 @test_2c(
134 ; CHECK-SAME: i16 [[X:%.*]]) {
135 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1)
136 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
137 ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
138 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
139 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
141 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 -1)
142 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31)
143 %a = zext i16 %2 to i32
145 %b.trunc = trunc i32 %b to i8
149 define i8 @test_2d(i16 %x) {
150 ; CHECK-LABEL: define i8 @test_2d(
151 ; CHECK-SAME: i16 [[X:%.*]]) {
152 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
153 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1)
154 ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
155 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
156 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
158 %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31)
159 %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 -1)
160 %a = zext i16 %2 to i32
162 %b.trunc = trunc i32 %b to i8
166 ; Tests for clamping with mixed-signed min and max.
167 ; With zext the optimization occurs.
168 define i8 @test_3a(i16 %x) {
169 ; CHECK-LABEL: define i8 @test_3a(
170 ; CHECK-SAME: i16 [[X:%.*]]) {
171 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
172 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
173 ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
174 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
175 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
177 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31)
178 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31)
179 %a = zext i16 %2 to i32
181 %b.trunc = trunc i32 %b to i8
185 define i8 @test_3b(i16 %x) {
186 ; CHECK-LABEL: define i8 @test_3b(
187 ; CHECK-SAME: i16 [[X:%.*]]) {
188 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
189 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
190 ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
191 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
192 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
194 %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31)
195 %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31)
196 %a = zext i16 %2 to i32
198 %b.trunc = trunc i32 %b to i8
202 ; Optimizations with vector types.
203 define <16 x i8> @test_vec_1a(<16 x i16> %x) {
204 ; CHECK-LABEL: define <16 x i8> @test_vec_1a(
205 ; CHECK-SAME: <16 x i16> [[X:%.*]]) {
206 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[X]], <16 x i16> splat (i16 127))
207 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[TMP1]], <16 x i16> zeroinitializer)
208 ; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
209 ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2)
210 ; CHECK-NEXT: ret <16 x i8> [[B]]
212 %1 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %x, <16 x i16> splat (i16 127))
213 %2 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %1, <16 x i16> zeroinitializer)
214 %a = sext <16 x i16> %2 to <16 x i32>
215 %b = lshr <16 x i32> %a, splat (i32 2)
216 %b.trunc = trunc <16 x i32> %b to <16 x i8>
217 ret <16 x i8> %b.trunc
220 define <16 x i8> @test_vec_1b(<16 x i16> %x) {
221 ; CHECK-LABEL: define <16 x i8> @test_vec_1b(
222 ; CHECK-SAME: <16 x i16> [[X:%.*]]) {
223 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[X]], <16 x i16> zeroinitializer)
224 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[TMP1]], <16 x i16> splat (i16 127))
225 ; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
226 ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2)
227 ; CHECK-NEXT: ret <16 x i8> [[B]]
229 %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %x, <16 x i16> zeroinitializer)
230 %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
231 %a = sext <16 x i16> %2 to <16 x i32>
232 %b = lshr <16 x i32> %a, splat (i32 2)
233 %b.trunc = trunc <16 x i32> %b to <16 x i8>
234 ret <16 x i8> %b.trunc
237 ; A longer test that was the original motivation for the smin-smax clamping.
238 define i8 @test_final(i16 %x, i16 %y) {
239 ; CHECK-LABEL: define i8 @test_final(
240 ; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
241 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 127)
242 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
243 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.smax.i16(i16 [[Y]], i16 0)
244 ; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP3]], i16 127)
245 ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[TMP2]], [[TMP4]]
246 ; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[MUL]], 7
247 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i16 [[SHR]] to i8
248 ; CHECK-NEXT: ret i8 [[TRUNC]]
250 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 127)
251 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
252 %x.clamp = zext nneg i16 %2 to i32
253 %3 = tail call i16 @llvm.smax.i16(i16 %y, i16 0)
254 %4 = tail call i16 @llvm.smin.i16(i16 %3, i16 127)
255 %y.clamp = zext nneg i16 %4 to i32
256 %mul = mul nuw nsw i32 %x.clamp, %y.clamp
257 %shr = lshr i32 %mul, 7
258 %trunc= trunc nuw nsw i32 %shr to i8
262 ; Range tests below check if the bounds are dealt with correctly.
264 ; This gets optimized.
265 define i8 @test_bounds_1(i16 %x) {
266 ; CHECK-LABEL: define i8 @test_bounds_1(
267 ; CHECK-SAME: i16 [[X:%.*]]) {
268 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 127)
269 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
270 ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
271 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 7
272 ; CHECK-NEXT: ret i8 [[B]]
274 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 127)
275 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
276 %a = sext i16 %2 to i32
278 %b.trunc = trunc i32 %b to i8
282 ; While this does not.
283 define i8 @test_bounds_2(i16 %x) {
284 ; CHECK-LABEL: define i8 @test_bounds_2(
285 ; CHECK-SAME: i16 [[X:%.*]]) {
286 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 128)
287 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
288 ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
289 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 7
290 ; CHECK-NEXT: ret i8 [[B]]
292 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 128)
293 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0)
294 %a = sext i16 %2 to i32
296 %b.trunc = trunc i32 %b to i8
300 ; This should get optimized. We test here if the optimization works correctly
301 ; if the upper limit is signed max int.
302 define i8 @test_bounds_3(i16 %x) {
303 ; CHECK-LABEL: define i8 @test_bounds_3(
304 ; CHECK-SAME: i16 [[X:%.*]]) {
305 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767)
306 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 32752)
307 ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
308 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
309 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
311 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 32767)
312 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 32752)
313 %a = sext i16 %2 to i32
315 %b.trunc = trunc i32 %b to i8
319 ; Here min = 128 is greater than max = 0.
320 define i8 @test_bounds_4(i16 %x) {
321 ; CHECK-LABEL: define i8 @test_bounds_4(
322 ; CHECK-SAME: i16 [[X:%.*]]) {
323 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 0)
324 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 128)
325 ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
326 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
327 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
329 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 0)
330 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 128)
331 %a = sext i16 %2 to i32
333 %b.trunc = trunc i32 %b to i8
337 ; The following 3 tests check the situation where min and max are minimal and
338 ; maximal signed values. No transformations should occur here.
339 define i8 @test_bounds_5(i16 %x) {
340 ; CHECK-LABEL: define i8 @test_bounds_5(
341 ; CHECK-SAME: i16 [[X:%.*]]) {
342 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767)
343 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -32768)
344 ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
345 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
346 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
348 %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 32767)
349 %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -32768)
350 %a = zext i16 %2 to i32
352 %b.trunc = trunc i32 %b to i8
356 define i8 @test_bounds_6(i32 %x) {
357 ; CHECK-LABEL: define i8 @test_bounds_6(
358 ; CHECK-SAME: i32 [[X:%.*]]) {
359 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[X]], i32 2147483647)
360 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP1]], i32 -2147483648)
361 ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[TMP2]], 2
362 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
363 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
365 %1 = tail call i32 @llvm.smin.i32(i32 %x, i32 2147483647)
366 %2 = tail call i32 @llvm.smax.i32(i32 %1, i32 -2147483648)
367 %a = zext i32 %2 to i64
369 %b.trunc = trunc i64 %b to i8
373 define i8 @test_bounds_7(i64 %x) {
374 ; CHECK-LABEL: define i8 @test_bounds_7(
375 ; CHECK-SAME: i64 [[X:%.*]]) {
376 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.smin.i64(i64 [[X]], i64 9223372036854775807)
377 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 -9223372036854775808)
378 ; CHECK-NEXT: [[B:%.*]] = lshr i64 [[TMP2]], 2
379 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i64 [[B]] to i8
380 ; CHECK-NEXT: ret i8 [[B_TRUNC]]
382 %1 = tail call i64 @llvm.smin.i64(i64 %x, i64 9223372036854775807)
383 %2 = tail call i64 @llvm.smax.i64(i64 %1, i64 -9223372036854775808)
384 %a = zext i64 %2 to i128
386 %b.trunc = trunc i128 %b to i8