1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
2 ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
4 ; Here we have multiple exits, but the different sources, different outputs are
5 ; needed, this checks that they are handled by separate switch statements.
7 define void @outline_outputs1() #0 {
9 %output = alloca i32, align 4
10 %result = alloca i32, align 4
11 %output2 = alloca i32, align 4
12 %result2 = alloca i32, align 4
13 %a = alloca i32, align 4
14 %b = alloca i32, align 4
17 %a2 = alloca i32, align 4
18 %b2 = alloca i32, align 4
21 %a2val = load i32, i32* %a
22 %b2val = load i32, i32* %b
23 %add2 = add i32 2, %a2val
24 %mul2 = mul i32 2, %b2val
27 %aval = load i32, i32* %a
28 %bval = load i32, i32* %b
29 %add = add i32 2, %aval
30 %mul = mul i32 2, %bval
33 store i32 %add, i32* %output, align 4
34 store i32 %mul, i32* %result, align 4
37 store i32 %add2, i32* %output, align 4
38 store i32 %mul2, i32* %result, align 4
41 %div = udiv i32 %aval, %bval
44 %sub = sub i32 %a2val, %b2val
48 define void @outline_outputs2() #0 {
50 %output = alloca i32, align 4
51 %result = alloca i32, align 4
52 %output2 = alloca i32, align 4
53 %result2 = alloca i32, align 4
54 %a = alloca i32, align 4
55 %b = alloca i32, align 4
58 %a2 = alloca i32, align 4
59 %b2 = alloca i32, align 4
62 %a2val = load i32, i32* %a
63 %b2val = load i32, i32* %b
64 %add2 = add i32 2, %a2val
65 %mul2 = mul i32 2, %b2val
68 %aval = load i32, i32* %a
69 %bval = load i32, i32* %b
70 %add = add i32 2, %aval
71 %mul = mul i32 2, %bval
74 store i32 %add, i32* %output, align 4
75 store i32 %mul, i32* %result, align 4
78 store i32 %add2, i32* %output, align 4
79 store i32 %mul2, i32* %result, align 4
82 %diff = sub i32 %a2val, %b2val
85 %quot = udiv i32 %add, %mul
88 ; CHECK-LABEL: @outline_outputs1(
90 ; CHECK-NEXT: [[BVAL_LOC:%.*]] = alloca i32, align 4
91 ; CHECK-NEXT: [[AVAL_LOC:%.*]] = alloca i32, align 4
92 ; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
93 ; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
94 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
95 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
96 ; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
97 ; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
98 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
99 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
100 ; CHECK-NEXT: br label [[BLOCK_2:%.*]]
102 ; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
103 ; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
104 ; CHECK-NEXT: br label [[BLOCK_2]]
106 ; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
107 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
108 ; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
109 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
110 ; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[AVAL_LOC]] to i8*
111 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
112 ; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[BVAL_LOC]] to i8*
113 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
114 ; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[AVAL_LOC]], i32* [[BVAL_LOC]], i32 0)
115 ; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
116 ; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
117 ; CHECK-NEXT: [[AVAL_RELOAD:%.*]] = load i32, i32* [[AVAL_LOC]], align 4
118 ; CHECK-NEXT: [[BVAL_RELOAD:%.*]] = load i32, i32* [[BVAL_LOC]], align 4
119 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
120 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
121 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
122 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
123 ; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_6:%.*]], label [[BLOCK_7:%.*]]
125 ; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL_RELOAD]], [[BVAL_RELOAD]]
126 ; CHECK-NEXT: ret void
128 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
129 ; CHECK-NEXT: ret void
132 ; CHECK-LABEL: @outline_outputs2(
134 ; CHECK-NEXT: [[MUL_LOC:%.*]] = alloca i32, align 4
135 ; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
136 ; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4
137 ; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4
138 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
139 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
140 ; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4
141 ; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4
142 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
143 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
144 ; CHECK-NEXT: br label [[BLOCK_2:%.*]]
146 ; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
147 ; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4
148 ; CHECK-NEXT: br label [[BLOCK_2]]
150 ; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8*
151 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
152 ; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8*
153 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
154 ; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
155 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
156 ; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[MUL_LOC]] to i8*
157 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
158 ; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[ADD_LOC]], i32* [[MUL_LOC]], i32 1)
159 ; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4
160 ; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4
161 ; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
162 ; CHECK-NEXT: [[MUL_RELOAD:%.*]] = load i32, i32* [[MUL_LOC]], align 4
163 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
164 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
165 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
166 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]])
167 ; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_7:%.*]], label [[BLOCK_6:%.*]]
169 ; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]]
170 ; CHECK-NEXT: ret void
172 ; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[ADD_RELOAD]], [[MUL_RELOAD]]
173 ; CHECK-NEXT: ret void
176 ; CHECK: define internal i1 @outlined_ir_func_0(
177 ; CHECK-NEXT: newFuncRoot:
178 ; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]]
179 ; CHECK: block_2_to_outline:
180 ; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
181 ; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
182 ; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]]
183 ; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]]
184 ; CHECK-NEXT: br label [[BLOCK_5:%.*]]
186 ; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4
187 ; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4
188 ; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]]
189 ; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]]
190 ; CHECK-NEXT: br label [[BLOCK_4:%.*]]
192 ; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4
193 ; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4
194 ; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]]
196 ; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4
197 ; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4
198 ; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]]
199 ; CHECK: block_6.exitStub:
200 ; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_1:%.*]] [
201 ; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
202 ; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
204 ; CHECK: block_7.exitStub:
205 ; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_0:%.*]] [
206 ; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
207 ; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]]
209 ; CHECK: output_block_0_0:
210 ; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4
211 ; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4
212 ; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
213 ; CHECK: output_block_0_1:
214 ; CHECK-NEXT: store i32 [[AVAL]], i32* [[TMP6:%.*]], align 4
215 ; CHECK-NEXT: store i32 [[BVAL]], i32* [[TMP7:%.*]], align 4
216 ; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
217 ; CHECK: output_block_1_0:
218 ; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4]], align 4
219 ; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5]], align 4
220 ; CHECK-NEXT: br label [[FINAL_BLOCK_0]]
221 ; CHECK: output_block_1_1:
222 ; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP6]], align 4
223 ; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP7]], align 4
224 ; CHECK-NEXT: br label [[FINAL_BLOCK_1]]
225 ; CHECK: final_block_0:
226 ; CHECK-NEXT: ret i1 false
227 ; CHECK: final_block_1:
228 ; CHECK-NEXT: ret i1 true