2 # This is a POC (proof of concept or piece of crap, take your pick) for reading the
3 # text representation of trace output related to page allocation. It makes an attempt
4 # to extract some high-level information on what is going on. The accuracy of the parser
5 # may vary considerably
7 # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
9 # --prepend-parent Report on the parent proc and PID
10 # --read-procstat If the trace lacks process info, get it from /proc
11 # --ignore-pid Aggregate processes of the same name together
13 # Copyright (c) IBM Corporation 2009
14 # Author: Mel Gorman <mel@csn.ul.ie>
19 use constant MM_PAGE_ALLOC
=> 1;
20 use constant MM_PAGE_FREE
=> 2;
21 use constant MM_PAGE_FREE_BATCHED
=> 3;
22 use constant MM_PAGE_PCPU_DRAIN
=> 4;
23 use constant MM_PAGE_ALLOC_ZONE_LOCKED
=> 5;
24 use constant MM_PAGE_ALLOC_EXTFRAG
=> 6;
25 use constant EVENT_UNKNOWN
=> 7;
27 # Constants used to track state
28 use constant STATE_PCPU_PAGES_DRAINED
=> 8;
29 use constant STATE_PCPU_PAGES_REFILLED
=> 9;
31 # High-level events extrapolated from tracepoints
32 use constant HIGH_PCPU_DRAINS
=> 10;
33 use constant HIGH_PCPU_REFILLS
=> 11;
34 use constant HIGH_EXT_FRAGMENT
=> 12;
35 use constant HIGH_EXT_FRAGMENT_SEVERE
=> 13;
36 use constant HIGH_EXT_FRAGMENT_MODERATE
=> 14;
37 use constant HIGH_EXT_FRAGMENT_CHANGED
=> 15;
42 my $opt_read_procstat;
43 my $opt_prepend_parent;
45 # Catch sigint and exit on request
46 my $sigint_report = 0;
48 my $sigint_pending = 0;
49 my $sigint_received = 0;
51 my $current_time = time;
52 if ($current_time - 2 > $sigint_received) {
53 print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
57 print "Second SIGINT received quickly, exiting\n";
62 if ($sigint_exit > 3) {
63 print "Many SIGINTs received, exiting now without report\n";
67 $sigint_received = $current_time;
70 $SIG{INT
} = "sigint_handler";
72 # Parse command line options
74 'ignore-pid' => \
$opt_ignorepid,
75 'read-procstat' => \
$opt_read_procstat,
76 'prepend-parent' => \
$opt_prepend_parent,
79 # Defaults for dynamically discovered regex's
80 my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
82 # Dyanically discovered regex
83 my $regex_fragdetails;
85 # Static regex used. Specified like this for readability and for use with /o
86 # (process_pid) (cpus ) ( time ) (tpoint ) (details)
87 my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
88 my $regex_statname = '[-0-9]*\s\((.*)\).*';
89 my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
91 sub generate_traceevent_regex
{
96 # Read the event format or use the default
97 if (!open (FORMAT
, "/sys/kernel/debug/tracing/events/$event/format")) {
101 while (!eof(FORMAT
)) {
103 if ($line =~ /^print fmt:\s"(.*)",.*/) {
105 $regex =~ s/%p/\([0-9a-f]*\)/g;
106 $regex =~ s/%d/\([-0-9]*\)/g;
107 $regex =~ s/%lu/\([0-9]*\)/g;
112 # Verify fields are in the right order
114 foreach $tuple (split /\s/, $regex) {
115 my ($key, $value) = split(/=/, $tuple);
116 my $expected = shift;
117 if ($key ne $expected) {
118 print("WARNING: Format not as expected '$key' != '$expected'");
119 $regex =~ s/$key=\((.*)\)/$key=$1/;
124 die("Fewer fields than expected in format");
129 $regex_fragdetails = generate_traceevent_regex
("kmem/mm_page_alloc_extfrag",
130 $regex_fragdetails_default,
132 "alloc_order", "fallback_order", "pageblock_order",
133 "alloc_migratetype", "fallback_migratetype",
134 "fragmenting", "change_ownership");
136 sub read_statline
($) {
140 if (open(STAT
, "/proc/$pid/stat")) {
145 if ($statline eq '') {
146 $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
152 sub guess_process_pid
($$) {
154 my $statline = $_[1];
160 if ($statline !~ /$regex_statname/o) {
161 die("Failed to math stat line for process name :: $statline");
166 sub parent_info
($$) {
168 my $statline = $_[1];
175 if ($statline !~ /$regex_statppid/o) {
176 die("Failed to match stat line process ppid:: $statline");
179 # Read the ppid stat line
181 return guess_process_pid
($ppid, read_statline
($ppid));
193 # Read each line of the event log
195 while ($traceevent = <STDIN
>) {
196 if ($traceevent =~ /$regex_traceevent/o) {
200 if ($opt_read_procstat || $opt_prepend_parent) {
201 $process_pid =~ /(.*)-([0-9]*)$/;
205 $statline = read_statline
($pid);
207 if ($opt_read_procstat && $process eq '') {
208 $process_pid = guess_process_pid
($pid, $statline);
211 if ($opt_prepend_parent) {
212 $process_pid = parent_info
($pid, $statline) . " :: $process_pid";
216 # Unnecessary in this script. Uncomment if required
223 # Perl Switch() sucks majorly
224 if ($tracepoint eq "mm_page_alloc") {
225 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC
}++;
226 } elsif ($tracepoint eq "mm_page_free") {
227 $perprocesspid{$process_pid}->{MM_PAGE_FREE
}++
228 } elsif ($tracepoint eq "mm_page_free_batched") {
229 $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED
}++;
230 } elsif ($tracepoint eq "mm_page_pcpu_drain") {
231 $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN
}++;
232 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED
}++;
233 } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
234 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED
}++;
235 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED
}++;
236 } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
238 # Extract the details of the event now
242 my ($alloc_order, $fallback_order, $pageblock_order);
243 my ($alloc_migratetype, $fallback_migratetype);
244 my ($fragmenting, $change_ownership);
246 if ($details !~ /$regex_fragdetails/o) {
247 print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
251 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG
}++;
255 $fallback_order = $4;
256 $pageblock_order = $5;
257 $alloc_migratetype = $6;
258 $fallback_migratetype = $7;
260 $change_ownership = $9;
263 $perprocesspid{$process_pid}->{HIGH_EXT_FRAG
}++;
264 if ($fallback_order <= 3) {
265 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE
}++;
267 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE
}++;
270 if ($change_ownership) {
271 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED
}++;
274 $perprocesspid{$process_pid}->{EVENT_UNKNOWN
}++;
277 # Catch a full pcpu drain event
278 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED
} &&
279 $tracepoint ne "mm_page_pcpu_drain") {
281 $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS
}++;
282 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED
} = 0;
285 # Catch a full pcpu refill event
286 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED
} &&
287 $tracepoint ne "mm_page_alloc_zone_locked") {
288 $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS
}++;
289 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED
} = 0;
292 if ($sigint_pending) {
300 my %stats = %$hashref;
302 # Dump per-process stats
306 # Get the maximum process name
307 foreach $process_pid (keys %perprocesspid) {
308 my $len = length($process_pid);
309 if ($len > $max_strlen) {
316 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
317 "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
318 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
319 "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", "");
321 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
322 "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
324 foreach $process_pid (keys %stats) {
325 # Dump final aggregates
326 if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED
}) {
327 $stats{$process_pid}->{HIGH_PCPU_DRAINS
}++;
328 $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED
} = 0;
330 if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED
}) {
331 $stats{$process_pid}->{HIGH_PCPU_REFILLS
}++;
332 $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED
} = 0;
335 printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n",
337 $stats{$process_pid}->{MM_PAGE_ALLOC
},
338 $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED
},
339 $stats{$process_pid}->{MM_PAGE_FREE
},
340 $stats{$process_pid}->{MM_PAGE_FREE_BATCHED
},
341 $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN
},
342 $stats{$process_pid}->{HIGH_PCPU_DRAINS
},
343 $stats{$process_pid}->{HIGH_PCPU_REFILLS
},
344 $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG
},
345 $stats{$process_pid}->{HIGH_EXT_FRAG
},
346 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED
},
347 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE
},
348 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE
},
349 $stats{$process_pid}->{EVENT_UNKNOWN
});
353 sub aggregate_perprocesspid
() {
358 foreach $process_pid (keys %perprocesspid) {
359 $process = $process_pid;
360 $process =~ s/-([0-9])*$//;
361 if ($process eq '') {
362 $process = "NO_PROCESS_NAME";
365 $perprocess{$process}->{MM_PAGE_ALLOC
} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC
};
366 $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED
} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED
};
367 $perprocess{$process}->{MM_PAGE_FREE
} += $perprocesspid{$process_pid}->{MM_PAGE_FREE
};
368 $perprocess{$process}->{MM_PAGE_FREE_BATCHED
} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED
};
369 $perprocess{$process}->{MM_PAGE_PCPU_DRAIN
} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN
};
370 $perprocess{$process}->{HIGH_PCPU_DRAINS
} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS
};
371 $perprocess{$process}->{HIGH_PCPU_REFILLS
} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS
};
372 $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG
} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG
};
373 $perprocess{$process}->{HIGH_EXT_FRAG
} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG
};
374 $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED
} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED
};
375 $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE
} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE
};
376 $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE
} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE
};
377 $perprocess{$process}->{EVENT_UNKNOWN
} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN
};
382 if (!$opt_ignorepid) {
383 dump_stats
(\
%perprocesspid);
385 aggregate_perprocesspid
();
386 dump_stats
(\
%perprocess);
390 # Process events or signals until neither is available
392 my $sigint_processed;
394 $sigint_processed = 0;
397 # Handle pending signals if any
398 if ($sigint_pending) {
399 my $current_time = time;
402 print "Received exit signal\n";
405 if ($sigint_report) {
406 if ($current_time >= $sigint_received + 2) {
410 $sigint_processed = 1;
414 } while ($sigint_pending || $sigint_processed);