1 ; REQUIRES: x86_64-linux
3 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl 2>&1 | FileCheck %s
5 ; The profiled source code:
8 ; __attribute__((noinline)) int bar(int p) {
12 ; __attribute__((always_inline)) int foo(int i, int p) {
13 ; if (i % 10) return bar(p);
14 ; else return bar(p + 1);
18 ; for (int i = 0; i < 1000 * 1000; i++) {
26 ; The source code for the current build:
29 ; __attribute__((noinline)) int bar(int p) {
33 ; __attribute__((always_inline)) int foo(int i, int p) {
34 ; if (i % 10) return bar(p);
35 ; else return bar(p + 1);
39 ; if (x == 0) // code change
40 ; return 0; // code change
41 ; for (int i = 0; i < 1000 * 1000; i++) {
44 ; if (i < 0) // code change
45 ; return 0; // code change
52 ; CHECK: Run stale profile matching for main
54 ; CHECK: Location is matched from 1 to 1
55 ; CHECK: Location is matched from 2 to 2
56 ; CHECK: Location is matched from 3 to 3
57 ; CHECK: Location is matched from 4 to 4
58 ; CHECK: Location is matched from 5 to 5
59 ; CHECK: Location is matched from 6 to 6
60 ; CHECK: Location is matched from 7 to 7
61 ; CHECK: Location is matched from 8 to 8
62 ; CHECK: Location is matched from 9 to 9
63 ; CHECK: Location is matched from 10 to 10
64 ; CHECK: Location is matched from 11 to 11
66 ; CHECK: Callsite with callee:foo is matched from 13 to 6
67 ; CHECK: Location is rematched backwards from 7 to 0
68 ; CHECK: Location is rematched backwards from 8 to 1
69 ; CHECK: Location is rematched backwards from 9 to 2
70 ; CHECK: Location is rematched backwards from 10 to 3
71 ; CHECK: Location is rematched backwards from 11 to 4
72 ; CHECK: Callsite with callee:bar is matched from 14 to 7
73 ; CHECK: Callsite with callee:foo is matched from 15 to 8
74 ; CHECK: Callsite with callee:bar is matched from 16 to 9
77 ; CHECK: 2: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
78 ; CHECK: 3: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
79 ; CHECK: 4: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00)
80 ; CHECK: 5: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)
81 ; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
82 ; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 101 - factor: 1.00)
83 ; CHECK: 5: %call.i8 = call i32 @bar(i32 noundef %1), !dbg ![[#]] - weight: 101 - factor: 1.00)
84 ; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00)
85 ; CHECK: 6: %call1.i5 = call i32 @bar(i32 noundef %add.i4), !dbg ![[#]] - weight: 13 - factor: 1.00)
86 ; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
87 ; CHECK: 14: %call2 = call i32 @bar(i32 noundef %3), !dbg ![[#]] - weight: 124 - factor: 1.00)
88 ; CHECK: 8: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)
89 ; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 117 - factor: 1.00)
90 ; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 104 - factor: 1.00)
91 ; CHECK: 5: %call.i = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 104 - factor: 1.00)
92 ; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00)
93 ; CHECK: 6: %call1.i = call i32 @bar(i32 noundef %add.i), !dbg ![[#]] - weight: 14 - factor: 1.00)
94 ; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 121 - factor: 1.00)
95 ; CHECK: 16: %call9 = call i32 @bar(i32 noundef %7), !dbg ![[#]] - weight: 126 - factor: 1.00)
96 ; CHECK: 9: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
97 ; CHECK: 10: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
98 ; CHECK: 11: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00)
99 ; CHECK: 1: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)
102 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
103 target triple = "x86_64-unknown-linux-gnu"
105 @x = dso_local global i32 1, align 4, !dbg !0
107 ; Function Attrs: noinline nounwind uwtable
108 define dso_local i32 @bar(i32 noundef %p) #0 !dbg !16 {
110 call void @llvm.dbg.value(metadata i32 %p, metadata !20, metadata !DIExpression()), !dbg !21
111 call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !22
115 ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
116 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
118 ; Function Attrs: alwaysinline nounwind uwtable
119 define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #2 !dbg !24 {
121 call void @llvm.dbg.value(metadata i32 %i, metadata !28, metadata !DIExpression()), !dbg !30
122 call void @llvm.dbg.value(metadata i32 %p, metadata !29, metadata !DIExpression()), !dbg !30
123 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !31
124 %rem = srem i32 %i, 10, !dbg !33
125 %tobool = icmp ne i32 %rem, 0, !dbg !33
126 br i1 %tobool, label %if.then, label %if.else, !dbg !34
128 if.then: ; preds = %entry
129 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !35
130 %call = call i32 @bar(i32 noundef %p), !dbg !36
131 br label %return, !dbg !38
133 if.else: ; preds = %entry
134 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg !39
135 %add = add nsw i32 %p, 1, !dbg !40
136 %call1 = call i32 @bar(i32 noundef %add), !dbg !41
137 br label %return, !dbg !43
139 return: ; preds = %if.else, %if.then
140 %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !44
141 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !45
142 ret i32 %retval.0, !dbg !45
145 ; Function Attrs: nounwind uwtable
146 define dso_local i32 @main() #3 !dbg !46 {
148 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !52
149 %0 = load volatile i32, ptr @x, align 4, !dbg !52, !tbaa !54
150 %cmp = icmp eq i32 %0, 0, !dbg !58
151 br i1 %cmp, label %if.then, label %if.end, !dbg !59
153 if.then: ; preds = %entry
154 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !60
155 br label %for.end, !dbg !60
157 if.end: ; preds = %entry
158 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !61
159 call void @llvm.dbg.value(metadata i32 0, metadata !50, metadata !DIExpression()), !dbg !62
160 br label %for.cond, !dbg !63
162 for.cond: ; preds = %if.end6, %if.end
163 %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !64
164 call void @llvm.dbg.value(metadata i32 %i.0, metadata !50, metadata !DIExpression()), !dbg !62
165 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !65
166 %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !67
167 br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !68
169 for.cond.cleanup: ; preds = %for.cond
170 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg !68
171 br label %cleanup, !dbg !68
173 for.body: ; preds = %for.cond
174 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !69
175 %1 = load volatile i32, ptr @x, align 4, !dbg !71, !tbaa !54
176 %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !72
177 %2 = load volatile i32, ptr @x, align 4, !dbg !74, !tbaa !54
178 %add = add nsw i32 %2, %call, !dbg !74
179 store volatile i32 %add, ptr @x, align 4, !dbg !74, !tbaa !54
180 %3 = load volatile i32, ptr @x, align 4, !dbg !75, !tbaa !54
181 %call2 = call i32 @bar(i32 noundef %3), !dbg !76
182 %4 = load volatile i32, ptr @x, align 4, !dbg !78, !tbaa !54
183 %add3 = add nsw i32 %4, %call2, !dbg !78
184 store volatile i32 %add3, ptr @x, align 4, !dbg !78, !tbaa !54
185 br i1 false, label %if.then5, label %if.end6, !dbg !79
187 if.then5: ; preds = %for.body
188 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !80
189 br label %cleanup, !dbg !80
191 if.end6: ; preds = %for.body
192 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !82
193 %5 = load volatile i32, ptr @x, align 4, !dbg !83, !tbaa !54
194 %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !84
195 %6 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !54
196 %add8 = add nsw i32 %6, %call7, !dbg !86
197 store volatile i32 %add8, ptr @x, align 4, !dbg !86, !tbaa !54
198 %7 = load volatile i32, ptr @x, align 4, !dbg !87, !tbaa !54
199 %call9 = call i32 @bar(i32 noundef %7), !dbg !88
200 %8 = load volatile i32, ptr @x, align 4, !dbg !90, !tbaa !54
201 %add10 = add nsw i32 %8, %call9, !dbg !90
202 store volatile i32 %add10, ptr @x, align 4, !dbg !90, !tbaa !54
203 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !91
204 %inc = add nsw i32 %i.0, 1, !dbg !91
205 call void @llvm.dbg.value(metadata i32 %inc, metadata !50, metadata !DIExpression()), !dbg !62
206 br label %for.cond, !dbg !92, !llvm.loop !93
208 cleanup: ; preds = %if.then5, %for.cond.cleanup
209 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !96
212 for.end: ; preds = %cleanup, %if.then
213 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg !97
217 ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
218 declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #4
220 ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
221 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #4
223 ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
224 declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) #1
226 ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
227 declare void @llvm.pseudoprobe(i64, i64, i32, i64) #5
229 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
230 declare void @llvm.dbg.value(metadata, metadata, metadata) #6
232 attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
233 attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
234 attributes #2 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
235 attributes #3 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
236 attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
237 attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
238 attributes #6 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
241 !llvm.module.flags = !{!7, !8, !9, !10, !11}
243 !llvm.pseudo_probe_desc = !{!13, !14, !15}
245 !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
246 !1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
247 !2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
248 !3 = !DIFile(filename: "test.c", directory: "path")
250 !5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
251 !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
252 !7 = !{i32 7, !"Dwarf Version", i32 5}
253 !8 = !{i32 2, !"Debug Info Version", i32 3}
254 !9 = !{i32 1, !"wchar_size", i32 4}
255 !10 = !{i32 7, !"uwtable", i32 2}
256 !11 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
257 !12 = !{!"clang version 17.0.0"}
258 !13 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
259 !14 = !{i64 6699318081062747564, i64 563022570642068, !"foo"}
260 !15 = !{i64 -2624081020897602054, i64 1126158552146340, !"main"}
261 !16 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 2, type: !17, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19)
262 !17 = !DISubroutineType(types: !18)
265 !20 = !DILocalVariable(name: "p", arg: 1, scope: !16, file: !3, line: 2, type: !6)
266 !21 = !DILocation(line: 0, scope: !16)
267 !22 = !DILocation(line: 3, column: 10, scope: !16)
268 !23 = !DILocation(line: 3, column: 3, scope: !16)
269 !24 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !25, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !27)
270 !25 = !DISubroutineType(types: !26)
273 !28 = !DILocalVariable(name: "i", arg: 1, scope: !24, file: !3, line: 6, type: !6)
274 !29 = !DILocalVariable(name: "p", arg: 2, scope: !24, file: !3, line: 6, type: !6)
275 !30 = !DILocation(line: 0, scope: !24)
276 !31 = !DILocation(line: 7, column: 6, scope: !32)
277 !32 = distinct !DILexicalBlock(scope: !24, file: !3, line: 7, column: 6)
278 !33 = !DILocation(line: 7, column: 8, scope: !32)
279 !34 = !DILocation(line: 7, column: 6, scope: !24)
280 !35 = !DILocation(line: 7, column: 26, scope: !32)
281 !36 = !DILocation(line: 7, column: 22, scope: !37)
282 !37 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646575)
283 !38 = !DILocation(line: 7, column: 14, scope: !32)
284 !39 = !DILocation(line: 8, column: 19, scope: !32)
285 !40 = !DILocation(line: 8, column: 21, scope: !32)
286 !41 = !DILocation(line: 8, column: 15, scope: !42)
287 !42 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646583)
288 !43 = !DILocation(line: 8, column: 8, scope: !32)
289 !44 = !DILocation(line: 0, scope: !32)
290 !45 = !DILocation(line: 9, column: 1, scope: !24)
291 !46 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !47, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !49)
292 !47 = !DISubroutineType(types: !48)
295 !50 = !DILocalVariable(name: "i", scope: !51, file: !3, line: 14, type: !6)
296 !51 = distinct !DILexicalBlock(scope: !46, file: !3, line: 14, column: 3)
297 !52 = !DILocation(line: 12, column: 6, scope: !53)
298 !53 = distinct !DILexicalBlock(scope: !46, file: !3, line: 12, column: 6)
299 !54 = !{!55, !55, i64 0}
300 !55 = !{!"int", !56, i64 0}
301 !56 = !{!"omnipotent char", !57, i64 0}
302 !57 = !{!"Simple C/C++ TBAA"}
303 !58 = !DILocation(line: 12, column: 8, scope: !53)
304 !59 = !DILocation(line: 12, column: 6, scope: !46)
305 !60 = !DILocation(line: 13, column: 5, scope: !53)
306 !61 = !DILocation(line: 14, column: 11, scope: !51)
307 !62 = !DILocation(line: 0, scope: !51)
308 !63 = !DILocation(line: 14, column: 7, scope: !51)
309 !64 = !DILocation(line: 14, scope: !51)
310 !65 = !DILocation(line: 14, column: 18, scope: !66)
311 !66 = distinct !DILexicalBlock(scope: !51, file: !3, line: 14, column: 3)
312 !67 = !DILocation(line: 14, column: 20, scope: !66)
313 !68 = !DILocation(line: 14, column: 3, scope: !51)
314 !69 = !DILocation(line: 15, column: 15, scope: !70)
315 !70 = distinct !DILexicalBlock(scope: !66, file: !3, line: 14, column: 40)
316 !71 = !DILocation(line: 15, column: 18, scope: !70)
317 !72 = !DILocation(line: 15, column: 11, scope: !73)
318 !73 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646639)
319 !74 = !DILocation(line: 15, column: 8, scope: !70)
320 !75 = !DILocation(line: 16, column: 15, scope: !70)
321 !76 = !DILocation(line: 16, column: 11, scope: !77)
322 !77 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646647)
323 !78 = !DILocation(line: 16, column: 8, scope: !70)
324 !79 = !DILocation(line: 17, column: 9, scope: !70)
325 !80 = !DILocation(line: 18, column: 8, scope: !81)
326 !81 = distinct !DILexicalBlock(scope: !70, file: !3, line: 17, column: 9)
327 !82 = !DILocation(line: 19, column: 15, scope: !70)
328 !83 = !DILocation(line: 19, column: 18, scope: !70)
329 !84 = !DILocation(line: 19, column: 11, scope: !85)
330 !85 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646655)
331 !86 = !DILocation(line: 19, column: 8, scope: !70)
332 !87 = !DILocation(line: 20, column: 15, scope: !70)
333 !88 = !DILocation(line: 20, column: 11, scope: !89)
334 !89 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646663)
335 !90 = !DILocation(line: 20, column: 8, scope: !70)
336 !91 = !DILocation(line: 14, column: 36, scope: !66)
337 !92 = !DILocation(line: 14, column: 3, scope: !66)
338 !93 = distinct !{!93, !68, !94, !95}
339 !94 = !DILocation(line: 21, column: 3, scope: !51)
340 !95 = !{!"llvm.loop.mustprogress"}
341 !96 = !DILocation(line: 0, scope: !46)
342 !97 = !DILocation(line: 22, column: 1, scope: !46)