|  | #!/usr/bin/env perl | 
|  | # This is a POC (proof of concept or piece of crap, take your pick) for reading the | 
|  | # text representation of trace output related to page allocation. It makes an attempt | 
|  | # to extract some high-level information on what is going on. The accuracy of the parser | 
|  | # may vary considerably | 
|  | # | 
|  | # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe | 
|  | # other options | 
|  | #   --prepend-parent	Report on the parent proc and PID | 
|  | #   --read-procstat	If the trace lacks process info, get it from /proc | 
|  | #   --ignore-pid	Aggregate processes of the same name together | 
|  | # | 
|  | # Copyright (c) IBM Corporation 2009 | 
|  | # Author: Mel Gorman <mel@csn.ul.ie> | 
|  | use strict; | 
|  | use Getopt::Long; | 
|  |  | 
|  | # Tracepoint events | 
|  | use constant MM_PAGE_ALLOC		=> 1; | 
|  | use constant MM_PAGE_FREE		=> 2; | 
|  | use constant MM_PAGE_FREE_BATCHED	=> 3; | 
|  | use constant MM_PAGE_PCPU_DRAIN		=> 4; | 
|  | use constant MM_PAGE_ALLOC_ZONE_LOCKED	=> 5; | 
|  | use constant MM_PAGE_ALLOC_EXTFRAG	=> 6; | 
|  | use constant EVENT_UNKNOWN		=> 7; | 
|  |  | 
|  | # Constants used to track state | 
|  | use constant STATE_PCPU_PAGES_DRAINED	=> 8; | 
|  | use constant STATE_PCPU_PAGES_REFILLED	=> 9; | 
|  |  | 
|  | # High-level events extrapolated from tracepoints | 
|  | use constant HIGH_PCPU_DRAINS		=> 10; | 
|  | use constant HIGH_PCPU_REFILLS		=> 11; | 
|  | use constant HIGH_EXT_FRAGMENT		=> 12; | 
|  | use constant HIGH_EXT_FRAGMENT_SEVERE	=> 13; | 
|  | use constant HIGH_EXT_FRAGMENT_MODERATE	=> 14; | 
|  | use constant HIGH_EXT_FRAGMENT_CHANGED	=> 15; | 
|  |  | 
|  | my %perprocesspid; | 
|  | my %perprocess; | 
|  | my $opt_ignorepid; | 
|  | my $opt_read_procstat; | 
|  | my $opt_prepend_parent; | 
|  |  | 
|  | # Catch sigint and exit on request | 
|  | my $sigint_report = 0; | 
|  | my $sigint_exit = 0; | 
|  | my $sigint_pending = 0; | 
|  | my $sigint_received = 0; | 
|  | sub sigint_handler { | 
|  | my $current_time = time; | 
|  | if ($current_time - 2 > $sigint_received) { | 
|  | print "SIGINT received, report pending. Hit ctrl-c again to exit\n"; | 
|  | $sigint_report = 1; | 
|  | } else { | 
|  | if (!$sigint_exit) { | 
|  | print "Second SIGINT received quickly, exiting\n"; | 
|  | } | 
|  | $sigint_exit++; | 
|  | } | 
|  |  | 
|  | if ($sigint_exit > 3) { | 
|  | print "Many SIGINTs received, exiting now without report\n"; | 
|  | exit; | 
|  | } | 
|  |  | 
|  | $sigint_received = $current_time; | 
|  | $sigint_pending = 1; | 
|  | } | 
|  | $SIG{INT} = "sigint_handler"; | 
|  |  | 
|  | # Parse command line options | 
|  | GetOptions( | 
|  | 'ignore-pid'	 =>	\$opt_ignorepid, | 
|  | 'read-procstat'	 =>	\$opt_read_procstat, | 
|  | 'prepend-parent' =>	\$opt_prepend_parent, | 
|  | ); | 
|  |  | 
|  | # Defaults for dynamically discovered regex's | 
|  | my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])'; | 
|  |  | 
|  | # Dyanically discovered regex | 
|  | my $regex_fragdetails; | 
|  |  | 
|  | # Static regex used. Specified like this for readability and for use with /o | 
|  | #                      (process_pid)     (cpus      )   ( time  )   (tpoint    ) (details) | 
|  | my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; | 
|  | my $regex_statname = '[-0-9]*\s\((.*)\).*'; | 
|  | my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*'; | 
|  |  | 
|  | sub generate_traceevent_regex { | 
|  | my $event = shift; | 
|  | my $default = shift; | 
|  | my $regex; | 
|  |  | 
|  | # Read the event format or use the default | 
|  | if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) { | 
|  | $regex = $default; | 
|  | } else { | 
|  | my $line; | 
|  | while (!eof(FORMAT)) { | 
|  | $line = <FORMAT>; | 
|  | if ($line =~ /^print fmt:\s"(.*)",.*/) { | 
|  | $regex = $1; | 
|  | $regex =~ s/%p/\([0-9a-f]*\)/g; | 
|  | $regex =~ s/%d/\([-0-9]*\)/g; | 
|  | $regex =~ s/%lu/\([0-9]*\)/g; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | # Verify fields are in the right order | 
|  | my $tuple; | 
|  | foreach $tuple (split /\s/, $regex) { | 
|  | my ($key, $value) = split(/=/, $tuple); | 
|  | my $expected = shift; | 
|  | if ($key ne $expected) { | 
|  | print("WARNING: Format not as expected '$key' != '$expected'"); | 
|  | $regex =~ s/$key=\((.*)\)/$key=$1/; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (defined shift) { | 
|  | die("Fewer fields than expected in format"); | 
|  | } | 
|  |  | 
|  | return $regex; | 
|  | } | 
|  | $regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag", | 
|  | $regex_fragdetails_default, | 
|  | "page", "pfn", | 
|  | "alloc_order", "fallback_order", "pageblock_order", | 
|  | "alloc_migratetype", "fallback_migratetype", | 
|  | "fragmenting", "change_ownership"); | 
|  |  | 
|  | sub read_statline($) { | 
|  | my $pid = $_[0]; | 
|  | my $statline; | 
|  |  | 
|  | if (open(STAT, "/proc/$pid/stat")) { | 
|  | $statline = <STAT>; | 
|  | close(STAT); | 
|  | } | 
|  |  | 
|  | if ($statline eq '') { | 
|  | $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0"; | 
|  | } | 
|  |  | 
|  | return $statline; | 
|  | } | 
|  |  | 
|  | sub guess_process_pid($$) { | 
|  | my $pid = $_[0]; | 
|  | my $statline = $_[1]; | 
|  |  | 
|  | if ($pid == 0) { | 
|  | return "swapper-0"; | 
|  | } | 
|  |  | 
|  | if ($statline !~ /$regex_statname/o) { | 
|  | die("Failed to math stat line for process name :: $statline"); | 
|  | } | 
|  | return "$1-$pid"; | 
|  | } | 
|  |  | 
|  | sub parent_info($$) { | 
|  | my $pid = $_[0]; | 
|  | my $statline = $_[1]; | 
|  | my $ppid; | 
|  |  | 
|  | if ($pid == 0) { | 
|  | return "NOPARENT-0"; | 
|  | } | 
|  |  | 
|  | if ($statline !~ /$regex_statppid/o) { | 
|  | die("Failed to match stat line process ppid:: $statline"); | 
|  | } | 
|  |  | 
|  | # Read the ppid stat line | 
|  | $ppid = $1; | 
|  | return guess_process_pid($ppid, read_statline($ppid)); | 
|  | } | 
|  |  | 
|  | sub process_events { | 
|  | my $traceevent; | 
|  | my $process_pid; | 
|  | my $cpus; | 
|  | my $timestamp; | 
|  | my $tracepoint; | 
|  | my $details; | 
|  | my $statline; | 
|  |  | 
|  | # Read each line of the event log | 
|  | EVENT_PROCESS: | 
|  | while ($traceevent = <STDIN>) { | 
|  | if ($traceevent =~ /$regex_traceevent/o) { | 
|  | $process_pid = $1; | 
|  | $tracepoint = $4; | 
|  |  | 
|  | if ($opt_read_procstat || $opt_prepend_parent) { | 
|  | $process_pid =~ /(.*)-([0-9]*)$/; | 
|  | my $process = $1; | 
|  | my $pid = $2; | 
|  |  | 
|  | $statline = read_statline($pid); | 
|  |  | 
|  | if ($opt_read_procstat && $process eq '') { | 
|  | $process_pid = guess_process_pid($pid, $statline); | 
|  | } | 
|  |  | 
|  | if ($opt_prepend_parent) { | 
|  | $process_pid = parent_info($pid, $statline) . " :: $process_pid"; | 
|  | } | 
|  | } | 
|  |  | 
|  | # Unnecessary in this script. Uncomment if required | 
|  | # $cpus = $2; | 
|  | # $timestamp = $3; | 
|  | } else { | 
|  | next; | 
|  | } | 
|  |  | 
|  | # Perl Switch() sucks majorly | 
|  | if ($tracepoint eq "mm_page_alloc") { | 
|  | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; | 
|  | } elsif ($tracepoint eq "mm_page_free") { | 
|  | $perprocesspid{$process_pid}->{MM_PAGE_FREE}++ | 
|  | } elsif ($tracepoint eq "mm_page_free_batched") { | 
|  | $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++; | 
|  | } elsif ($tracepoint eq "mm_page_pcpu_drain") { | 
|  | $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; | 
|  | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; | 
|  | } elsif ($tracepoint eq "mm_page_alloc_zone_locked") { | 
|  | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++; | 
|  | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++; | 
|  | } elsif ($tracepoint eq "mm_page_alloc_extfrag") { | 
|  |  | 
|  | # Extract the details of the event now | 
|  | $details = $5; | 
|  |  | 
|  | my ($page, $pfn); | 
|  | my ($alloc_order, $fallback_order, $pageblock_order); | 
|  | my ($alloc_migratetype, $fallback_migratetype); | 
|  | my ($fragmenting, $change_ownership); | 
|  |  | 
|  | if ($details !~ /$regex_fragdetails/o) { | 
|  | print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n"; | 
|  | next; | 
|  | } | 
|  |  | 
|  | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++; | 
|  | $page = $1; | 
|  | $pfn = $2; | 
|  | $alloc_order = $3; | 
|  | $fallback_order = $4; | 
|  | $pageblock_order = $5; | 
|  | $alloc_migratetype = $6; | 
|  | $fallback_migratetype = $7; | 
|  | $fragmenting = $8; | 
|  | $change_ownership = $9; | 
|  |  | 
|  | if ($fragmenting) { | 
|  | $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++; | 
|  | if ($fallback_order <= 3) { | 
|  | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++; | 
|  | } else { | 
|  | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++; | 
|  | } | 
|  | } | 
|  | if ($change_ownership) { | 
|  | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++; | 
|  | } | 
|  | } else { | 
|  | $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++; | 
|  | } | 
|  |  | 
|  | # Catch a full pcpu drain event | 
|  | if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} && | 
|  | $tracepoint ne "mm_page_pcpu_drain") { | 
|  |  | 
|  | $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++; | 
|  | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; | 
|  | } | 
|  |  | 
|  | # Catch a full pcpu refill event | 
|  | if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} && | 
|  | $tracepoint ne "mm_page_alloc_zone_locked") { | 
|  | $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++; | 
|  | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; | 
|  | } | 
|  |  | 
|  | if ($sigint_pending) { | 
|  | last EVENT_PROCESS; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | sub dump_stats { | 
|  | my $hashref = shift; | 
|  | my %stats = %$hashref; | 
|  |  | 
|  | # Dump per-process stats | 
|  | my $process_pid; | 
|  | my $max_strlen = 0; | 
|  |  | 
|  | # Get the maximum process name | 
|  | foreach $process_pid (keys %perprocesspid) { | 
|  | my $len = length($process_pid); | 
|  | if ($len > $max_strlen) { | 
|  | $max_strlen = $len; | 
|  | } | 
|  | } | 
|  | $max_strlen += 2; | 
|  |  | 
|  | printf("\n"); | 
|  | printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n", | 
|  | "Process", "Pages",  "Pages",      "Pages", "Pages", "PCPU",  "PCPU",   "PCPU",    "Fragment",  "Fragment", "MigType", "Fragment", "Fragment", "Unknown"); | 
|  | printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n", | 
|  | "details", "allocd", "allocd",     "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing",   "Changed", "Severe", "Moderate", ""); | 
|  |  | 
|  | printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n", | 
|  | "",        "",       "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", ""); | 
|  |  | 
|  | foreach $process_pid (keys %stats) { | 
|  | # Dump final aggregates | 
|  | if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) { | 
|  | $stats{$process_pid}->{HIGH_PCPU_DRAINS}++; | 
|  | $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; | 
|  | } | 
|  | if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) { | 
|  | $stats{$process_pid}->{HIGH_PCPU_REFILLS}++; | 
|  | $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; | 
|  | } | 
|  |  | 
|  | printf("%-" . $max_strlen . "s %8d %10d   %8d %8d   %8d %8d %8d   %8d %8d %8d %8d %8d %8d\n", | 
|  | $process_pid, | 
|  | $stats{$process_pid}->{MM_PAGE_ALLOC}, | 
|  | $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, | 
|  | $stats{$process_pid}->{MM_PAGE_FREE}, | 
|  | $stats{$process_pid}->{MM_PAGE_FREE_BATCHED}, | 
|  | $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, | 
|  | $stats{$process_pid}->{HIGH_PCPU_DRAINS}, | 
|  | $stats{$process_pid}->{HIGH_PCPU_REFILLS}, | 
|  | $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}, | 
|  | $stats{$process_pid}->{HIGH_EXT_FRAG}, | 
|  | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}, | 
|  | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}, | 
|  | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}, | 
|  | $stats{$process_pid}->{EVENT_UNKNOWN}); | 
|  | } | 
|  | } | 
|  |  | 
|  | sub aggregate_perprocesspid() { | 
|  | my $process_pid; | 
|  | my $process; | 
|  | undef %perprocess; | 
|  |  | 
|  | foreach $process_pid (keys %perprocesspid) { | 
|  | $process = $process_pid; | 
|  | $process =~ s/-([0-9])*$//; | 
|  | if ($process eq '') { | 
|  | $process = "NO_PROCESS_NAME"; | 
|  | } | 
|  |  | 
|  | $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; | 
|  | $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; | 
|  | $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE}; | 
|  | $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}; | 
|  | $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; | 
|  | $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; | 
|  | $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; | 
|  | $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}; | 
|  | $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}; | 
|  | $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}; | 
|  | $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}; | 
|  | $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}; | 
|  | $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN}; | 
|  | } | 
|  | } | 
|  |  | 
|  | sub report() { | 
|  | if (!$opt_ignorepid) { | 
|  | dump_stats(\%perprocesspid); | 
|  | } else { | 
|  | aggregate_perprocesspid(); | 
|  | dump_stats(\%perprocess); | 
|  | } | 
|  | } | 
|  |  | 
|  | # Process events or signals until neither is available | 
|  | sub signal_loop() { | 
|  | my $sigint_processed; | 
|  | do { | 
|  | $sigint_processed = 0; | 
|  | process_events(); | 
|  |  | 
|  | # Handle pending signals if any | 
|  | if ($sigint_pending) { | 
|  | my $current_time = time; | 
|  |  | 
|  | if ($sigint_exit) { | 
|  | print "Received exit signal\n"; | 
|  | $sigint_pending = 0; | 
|  | } | 
|  | if ($sigint_report) { | 
|  | if ($current_time >= $sigint_received + 2) { | 
|  | report(); | 
|  | $sigint_report = 0; | 
|  | $sigint_pending = 0; | 
|  | $sigint_processed = 1; | 
|  | } | 
|  | } | 
|  | } | 
|  | } while ($sigint_pending || $sigint_processed); | 
|  | } | 
|  |  | 
|  | signal_loop(); | 
|  | report(); |