1 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
2 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
3 ; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE
5 ; CodeGenPrepare should move the zext into the block with the load
6 ; so that SelectionDAG can select it with the load.
9 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
10 ; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
11 ; OPTALL: store i32 [[ZEXT]], i32* %q
13 define void @foo(i8* %p, i32* %q) {
16 %a = icmp slt i8 %t, 20
17 br i1 %a, label %true, label %false
19 %s = zext i8 %t to i32
26 ; Check that we manage to form a zextload is an operation with only one
27 ; argument to explicitly extend is in the way.
28 ; OPTALL-LABEL: @promoteOneArg
29 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
30 ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
31 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
32 ; Make sure the operation is not promoted when the promotion pass is disabled.
33 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
34 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
35 ; OPTALL: store i32 [[RES]], i32* %q
37 define void @promoteOneArg(i8* %p, i32* %q) {
40 %add = add nuw i8 %t, 2
41 %a = icmp slt i8 %t, 20
42 br i1 %a, label %true, label %false
44 %s = zext i8 %add to i32
51 ; Check that we manage to form a sextload is an operation with only one
52 ; argument to explicitly extend is in the way.
54 ; OPTALL-LABEL: @promoteOneArgSExt
55 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
56 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
57 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
58 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
59 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
60 ; OPTALL: store i32 [[RES]], i32* %q
62 define void @promoteOneArgSExt(i8* %p, i32* %q) {
65 %add = add nsw i8 %t, 2
66 %a = icmp slt i8 %t, 20
67 br i1 %a, label %true, label %false
69 %s = sext i8 %add to i32
76 ; Check that we manage to form a zextload is an operation with two
77 ; arguments to explicitly extend is in the way.
78 ; Extending %add will create two extensions:
81 ; #1 will not be removed as we do not know anything about %b.
82 ; #2 may not be merged with the load because %t is used in a comparison.
83 ; Since two extensions may be emitted in the end instead of one before the
84 ; transformation, the regular heuristic does not apply the optimization.
86 ; OPTALL-LABEL: @promoteTwoArgZext
87 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
89 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
90 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
91 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
93 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
94 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
96 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
97 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
99 ; OPTALL: store i32 [[RES]], i32* %q
101 define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
104 %add = add nuw i8 %t, %b
105 %a = icmp slt i8 %t, 20
106 br i1 %a, label %true, label %false
108 %s = zext i8 %add to i32
109 store i32 %s, i32* %q
115 ; Check that we manage to form a sextload is an operation with two
116 ; arguments to explicitly extend is in the way.
118 ; OPTALL-LABEL: @promoteTwoArgSExt
119 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
121 ; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
122 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
123 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
125 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
126 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
128 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
129 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
130 ; OPTALL: store i32 [[RES]], i32* %q
132 define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
135 %add = add nsw i8 %t, %b
136 %a = icmp slt i8 %t, 20
137 br i1 %a, label %true, label %false
139 %s = sext i8 %add to i32
140 store i32 %s, i32* %q
146 ; Check that we do not a zextload if we need to introduce more than
147 ; one additional extension.
148 ; OPTALL-LABEL: @promoteThreeArgZext
149 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
151 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
152 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
153 ; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
154 ; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
155 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
157 ; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
158 ; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
159 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
161 ; DISABLE: add nuw i8
162 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
163 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
165 ; OPTALL: store i32 [[RES]], i32* %q
167 define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
170 %tmp = add nuw i8 %t, %b
171 %add = add nuw i8 %tmp, %c
172 %a = icmp slt i8 %t, 20
173 br i1 %a, label %true, label %false
175 %s = zext i8 %add to i32
176 store i32 %s, i32* %q
182 ; Check that we manage to form a zextload after promoting and merging
184 ; OPTALL-LABEL: @promoteMergeExtArgZExt
185 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
187 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
188 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
189 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
191 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
192 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
193 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
195 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
196 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
197 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
199 ; OPTALL: store i32 [[RES]], i32* %q
201 define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
204 %ext = zext i8 %t to i16
205 %add = add nuw i16 %ext, %b
206 %a = icmp slt i8 %t, 20
207 br i1 %a, label %true, label %false
209 %s = zext i16 %add to i32
210 store i32 %s, i32* %q
216 ; Check that we manage to form a sextload after promoting and merging
219 ; OPTALL-LABEL: @promoteMergeExtArgSExt
220 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
222 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
223 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
224 ; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
226 ; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
227 ; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
228 ; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
230 ; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
231 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
232 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
233 ; OPTALL: store i32 [[RES]], i32* %q
235 define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
238 %ext = zext i8 %t to i16
239 %add = add nsw i16 %ext, %b
240 %a = icmp slt i8 %t, 20
241 br i1 %a, label %true, label %false
243 %s = sext i16 %add to i32
244 store i32 %s, i32* %q
250 ; Check that we manage to catch all the extload opportunities that are exposed
251 ; by the different iterations of codegen prepare.
252 ; Moreover, check that we do not promote more than we need to.
253 ; Here is what is happening in this test (not necessarly in this order):
254 ; 1. We try to promote the operand of %sextadd.
255 ; a. This creates one sext of %ld2 and one of %zextld
256 ; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
257 ; introduced one. This is fine with the current heuristic: neutral.
258 ; => We have one zext of %zextld left and we created one sext of %ld2.
259 ; 2. We try to promote the operand of %sextaddza.
260 ; a. This creates one sext of %zexta and one of %zextld
261 ; b. The sext of %zexta can be combined with the zext of %a.
262 ; c. The sext of %zextld leads to %ld and can be combined with it. This is
263 ; done by promoting %zextld. This is fine with the current heuristic:
265 ; => We have created a new zext of %ld and we created one sext of %zexta.
266 ; 3. We try to promote the operand of %sextaddb.
267 ; a. This creates one sext of %b and one of %zextld
268 ; b. The sext of %b is a dead-end, nothing to be done.
269 ; c. Same thing as 2.c. happens.
270 ; => We have created a new zext of %ld and we created one sext of %b.
271 ; 4. We try to promote the operand of the zext of %zextld introduced in #1.
272 ; a. Same thing as 2.c. happens.
273 ; b. %zextld does not have any other uses. It is dead coded.
274 ; => We have created a new zext of %ld and we removed a zext of %zextld and
276 ; Currently we do not try to reuse existing extensions, so in the end we have
277 ; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
279 ; OPTALL-LABEL: @severalPromotions
280 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
281 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
282 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
283 ; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
284 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
285 ; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
286 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_3]]
287 ; OPT-NEXT: [[ZEXTLD1_4:%[a-zA-Z_0-9-]+]] = zext i8 %a to i64
288 ; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXTLD1_4]], [[ZEXTLD1_2]]
289 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
290 ; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_1]]
292 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
293 ; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
294 ; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
295 ; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64
296 ; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
297 ; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64
299 ; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
301 define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
302 %ld = load i8, i8* %addr1
303 %zextld = zext i8 %ld to i32
304 %ld2 = load i32, i32* %addr2
305 %add = add nsw i32 %ld2, %zextld
306 %sextadd = sext i32 %add to i64
307 %zexta = zext i8 %a to i32
308 %addza = add nsw i32 %zexta, %zextld
309 %sextaddza = sext i32 %addza to i64
310 %addb = add nsw i32 %b, %zextld
311 %sextaddb = sext i32 %addb to i64
312 call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
316 declare void @dummy(i64, i64, i64)
318 ; Make sure we do not try to promote vector types since the type promotion
319 ; helper does not support them for now.
320 ; OPTALL-LABEL: @vectorPromotion
321 ; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
322 ; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
324 define void @vectorPromotion() {
326 %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
327 %b = zext <2 x i32> %a to <2 x i64>
331 @a = common global i32 0, align 4
332 @c = common global [2 x i32] zeroinitializer, align 4
334 ; Make sure we support promotion of operands that produces a Value as opposed
336 ; This used to cause a crash.
337 ; OPTALL-LABEL: @promotionOfArgEndsUpInValue
338 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
340 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
341 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
343 ; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
344 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
346 ; OPTALL-NEXT: ret i32 [[RES]]
347 define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
349 %val = load i16, i16* %addr
350 %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
351 %conv3 = sext i16 %add to i32
355 ; Check that we see that one zext can be derived from the other for free.
356 ; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
357 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
359 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
360 ; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
361 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
362 ; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
363 ; OPT-NEXT: store i32 [[RES32]], i32* %addr
364 ; OPT-NEXT: store i64 [[RES64]], i64* %q
366 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
367 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
368 ; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
369 ; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
370 ; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
371 ; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
373 ; OPTALL-NEXT: ret void
374 define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
377 %zextt = zext i8 %t to i32
378 %add = add nuw i32 %zextt, %b
379 %add2 = add nuw i32 %zextt, 12
380 store i32 %add, i32 *%addr
381 %s = zext i32 %add2 to i64
382 store i64 %s, i64* %q
386 ; Check that we do not increase the cost of the code.
387 ; The input has one free zext and one free sext. If we would have promoted
388 ; all the way through the load we would end up with a free zext and a
389 ; non-free sext (of %b).
390 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode
391 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
393 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
394 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
395 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
396 ; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
398 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
399 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
400 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
402 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
403 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
404 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
406 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
407 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
408 ; OPTALL-NEXT: ret void
409 define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
412 %zextt = zext i8 %t to i32
413 %add = add nsw i32 %zextt, %b
414 %idx64 = sext i32 %add to i64
415 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
416 store i32 %add, i32 *%staddr
420 ; Check that we do not increase the cost of the code.
421 ; The input has one free zext and one free sext. If we would have promoted
422 ; all the way through the load we would end up with a free zext and a
423 ; non-free sext (of %b).
424 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64
425 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
427 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
428 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
429 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
431 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
432 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
433 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
435 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
436 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
437 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
439 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]]
440 ; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]]
441 ; OPTALL-NEXT: ret void
442 define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) {
445 %zextt = zext i8 %t to i32
446 %add = add nsw i32 %zextt, %b
447 %idx64 = sext i32 %add to i64
448 %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64
449 store i64 %stuff, i64 *%staddr
453 ; Check that we do not increase the cost of the code.
454 ; The input has one free zext and one free sext. If we would have promoted
455 ; all the way through the load we would end up with a free zext and a
456 ; non-free sext (of %b).
457 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
458 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
460 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
461 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
462 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
464 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
465 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
466 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
468 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
469 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
470 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
472 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]]
473 ; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]]
474 ; OPTALL-NEXT: ret void
475 define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) {
478 %zextt = zext i8 %t to i32
479 %add = add nsw i32 %zextt, %b
480 %idx64 = sext i32 %add to i64
481 %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64
482 store i128 %stuff, i128 *%staddr
487 ; Check that we do not increase the cost of the code.
488 ; The input has one free zext and one free sext. If we would have promoted
489 ; all the way through the load we would end up with a free zext and a
490 ; non-free sext (of %b).
491 ; OPTALL-LABEL: @promoteSExtFromAddrMode256
492 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
494 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
495 ; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
496 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
498 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
499 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
500 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
502 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]]
503 ; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]]
504 ; OPTALL-NEXT: ret void
505 define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) {
508 %zextt = zext i8 %t to i32
509 %add = add nsw i32 %zextt, %b
510 %idx64 = sext i32 %add to i64
511 %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64
512 store i256 %stuff, i256 *%staddr
516 ; Check that we do not increase the cost of the code.
517 ; The input has one free zext and one free zext.
518 ; When we promote all the way through the load, we end up with
519 ; a free zext and a non-free zext (of %b).
520 ; However, the current target lowering says zext i32 to i64 is free
521 ; so the promotion happens because the cost did not change and may
522 ; expose more opportunities.
523 ; This would need to be fixed at some point.
524 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode
525 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
527 ; This transformation should really happen only for stress mode.
528 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
529 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
530 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
531 ; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
533 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
534 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
535 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
537 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
538 ; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
539 ; OPTALL-NEXT: ret void
540 define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
543 %zextt = zext i8 %t to i32
544 %add = add nuw i32 %zextt, %b
545 %idx64 = zext i32 %add to i64
546 %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
547 store i32 %add, i32 *%staddr
551 ; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift
552 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
554 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
555 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
556 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
558 ; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
559 ; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
560 ; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
562 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
563 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
564 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
566 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
567 ; OPTALL-NEXT: ret i64 %staddr
568 define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) {
571 %zextt = zext i8 %t to i32
572 %add = add nsw i32 %zextt, %b
573 %idx64 = sext i32 %add to i64
574 %staddr = shl i64 %idx64, 12
578 ; Same comment as doNotPromoteFreeZExtFromAddrMode.
579 ; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift
580 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
582 ; This transformation should really happen only for stress mode.
583 ; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
584 ; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
585 ; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
587 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
588 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
589 ; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
591 ; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
592 ; OPTALL-NEXT: ret i64 %staddr
593 define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) {
596 %zextt = zext i8 %t to i32
597 %add = add nuw i32 %zextt, %b
598 %idx64 = zext i32 %add to i64
599 %staddr = shl i64 %idx64, 12
603 ; The input has one free zext and one non-free sext.
604 ; When we promote all the way through to the load, we end up with
605 ; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
606 ; However, we when generate load pair and the free sext(%ld1) becomes
607 ; non-free. So technically, we trade a non-free sext to two non-free
609 ; This would need to be fixed at some point.
610 ; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad
611 ; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p
612 ; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1
613 ; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]]
615 ; This transformation should really happen only for stress mode.
616 ; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64
617 ; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64
618 ; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]]
620 ; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst
621 ; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64
623 ; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64
624 ; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
625 ; OPTALL-NEXT: ret i64 [[FINAL]]
626 define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) {
627 %ld0 = load i32, i32* %p
628 %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1
629 %ld1 = load i32, i32* %idxLd1
630 %res = add nsw i32 %ld1, %cst
631 %sextres = sext i32 %res to i64
632 %zextLd0 = zext i32 %ld0 to i64
633 %final = add i64 %sextres, %zextLd0
637 define i64 @promoteZextShl(i1 %c, i16* %P) {
639 ; OPTALL-LABEL: promoteZextShl
641 ; OPT: %[[LD:.*]] = load i16, i16* %P
642 ; OPT: %[[EXT:.*]] = zext i16 %[[LD]] to i64
644 ; OPT: shl nsw i64 %[[EXT]], 1
646 ; DISABLE: %r = sext i32 %shl2 to i64
647 %ld = load i16, i16* %P
648 br i1 %c, label %end, label %if.then
650 %z = zext i16 %ld to i32
651 %shl2 = shl nsw i32 %z, 1
652 %r = sext i32 %shl2 to i64