1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 -sink-insts-to-avoid-spills | FileCheck %s
4 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6 @A = external dso_local global [100 x i32], align 4
8 define i32 @sink_load_and_copy(i32 %n) {
9 ; CHECK-LABEL: sink_load_and_copy:
10 ; CHECK: // %bb.0: // %entry
11 ; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
12 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
13 ; CHECK-NEXT: .cfi_def_cfa_offset 32
14 ; CHECK-NEXT: .cfi_offset w19, -8
15 ; CHECK-NEXT: .cfi_offset w20, -16
16 ; CHECK-NEXT: .cfi_offset w21, -24
17 ; CHECK-NEXT: .cfi_offset w30, -32
18 ; CHECK-NEXT: mov w19, w0
19 ; CHECK-NEXT: cmp w0, #1
20 ; CHECK-NEXT: b.lt .LBB0_3
21 ; CHECK-NEXT: // %bb.1: // %for.body.preheader
22 ; CHECK-NEXT: adrp x8, A
23 ; CHECK-NEXT: mov w20, w19
24 ; CHECK-NEXT: ldr w21, [x8, :lo12:A]
25 ; CHECK-NEXT: .LBB0_2: // %for.body
26 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
27 ; CHECK-NEXT: mov w0, w21
28 ; CHECK-NEXT: bl _Z3usei
29 ; CHECK-NEXT: sdiv w20, w20, w0
30 ; CHECK-NEXT: subs w19, w19, #1
31 ; CHECK-NEXT: b.ne .LBB0_2
32 ; CHECK-NEXT: b .LBB0_4
33 ; CHECK-NEXT: .LBB0_3:
34 ; CHECK-NEXT: mov w20, w19
35 ; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup
36 ; CHECK-NEXT: mov w0, w20
37 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
38 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
41 %cmp63 = icmp sgt i32 %n, 0
42 br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
45 %0 = load i32, ptr @A, align 4
49 %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
53 %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
54 %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
55 %call = tail call i32 @_Z3usei(i32 %0)
56 %div = sdiv i32 %sum.065, %call
57 %lsr.iv.next = add i32 %lsr.iv, -1
58 %exitcond.not = icmp eq i32 %lsr.iv.next, 0
59 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
62 define i32 @cant_sink_successive_call(i32 %n) {
63 ; CHECK-LABEL: cant_sink_successive_call:
64 ; CHECK: // %bb.0: // %entry
65 ; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
66 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
67 ; CHECK-NEXT: .cfi_def_cfa_offset 32
68 ; CHECK-NEXT: .cfi_offset w19, -8
69 ; CHECK-NEXT: .cfi_offset w20, -16
70 ; CHECK-NEXT: .cfi_offset w21, -24
71 ; CHECK-NEXT: .cfi_offset w30, -32
72 ; CHECK-NEXT: mov w19, w0
73 ; CHECK-NEXT: cmp w0, #1
74 ; CHECK-NEXT: b.lt .LBB1_3
75 ; CHECK-NEXT: // %bb.1: // %for.body.preheader
76 ; CHECK-NEXT: adrp x8, A
77 ; CHECK-NEXT: mov w0, w19
78 ; CHECK-NEXT: ldr w20, [x8, :lo12:A]
79 ; CHECK-NEXT: bl _Z3usei
80 ; CHECK-NEXT: mov w21, w19
81 ; CHECK-NEXT: .LBB1_2: // %for.body
82 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
83 ; CHECK-NEXT: mov w0, w20
84 ; CHECK-NEXT: bl _Z3usei
85 ; CHECK-NEXT: sdiv w21, w21, w0
86 ; CHECK-NEXT: subs w19, w19, #1
87 ; CHECK-NEXT: b.ne .LBB1_2
88 ; CHECK-NEXT: b .LBB1_4
89 ; CHECK-NEXT: .LBB1_3:
90 ; CHECK-NEXT: mov w21, w19
91 ; CHECK-NEXT: .LBB1_4: // %for.cond.cleanup
92 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
93 ; CHECK-NEXT: mov w0, w21
94 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
97 %cmp63 = icmp sgt i32 %n, 0
98 br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
101 %0 = load i32, ptr @A, align 4
102 %call0 = tail call i32 @_Z3usei(i32 %n)
106 %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
110 %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
111 %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
112 %call = tail call i32 @_Z3usei(i32 %0)
113 %div = sdiv i32 %sum.065, %call
114 %lsr.iv.next = add i32 %lsr.iv, -1
115 %exitcond.not = icmp eq i32 %lsr.iv.next, 0
116 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
119 define i32 @cant_sink_successive_store(ptr nocapture readnone %store, i32 %n) {
120 ; CHECK-LABEL: cant_sink_successive_store:
121 ; CHECK: // %bb.0: // %entry
122 ; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
123 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
124 ; CHECK-NEXT: .cfi_def_cfa_offset 32
125 ; CHECK-NEXT: .cfi_offset w19, -8
126 ; CHECK-NEXT: .cfi_offset w20, -16
127 ; CHECK-NEXT: .cfi_offset w21, -24
128 ; CHECK-NEXT: .cfi_offset w30, -32
129 ; CHECK-NEXT: mov w19, w1
130 ; CHECK-NEXT: cmp w1, #1
131 ; CHECK-NEXT: b.lt .LBB2_3
132 ; CHECK-NEXT: // %bb.1: // %for.body.preheader
133 ; CHECK-NEXT: adrp x8, A
134 ; CHECK-NEXT: mov w21, w19
135 ; CHECK-NEXT: ldr w20, [x8, :lo12:A]
136 ; CHECK-NEXT: mov w8, #42 // =0x2a
137 ; CHECK-NEXT: str w8, [x0]
138 ; CHECK-NEXT: .LBB2_2: // %for.body
139 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
140 ; CHECK-NEXT: mov w0, w20
141 ; CHECK-NEXT: bl _Z3usei
142 ; CHECK-NEXT: sdiv w21, w21, w0
143 ; CHECK-NEXT: subs w19, w19, #1
144 ; CHECK-NEXT: b.ne .LBB2_2
145 ; CHECK-NEXT: b .LBB2_4
146 ; CHECK-NEXT: .LBB2_3:
147 ; CHECK-NEXT: mov w21, w19
148 ; CHECK-NEXT: .LBB2_4: // %for.cond.cleanup
149 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
150 ; CHECK-NEXT: mov w0, w21
151 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
154 %cmp63 = icmp sgt i32 %n, 0
155 br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
158 %0 = load i32, ptr @A, align 4
159 store i32 42, ptr %store, align 4
163 %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
167 %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
168 %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
169 %call = tail call i32 @_Z3usei(i32 %0)
170 %div = sdiv i32 %sum.065, %call
171 %lsr.iv.next = add i32 %lsr.iv, -1
172 %exitcond.not = icmp eq i32 %lsr.iv.next, 0
173 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
176 declare i32 @_Z3usei(i32)