diff options
-rw-r--r-- | Makefile | 5 | ||||
-rwxr-xr-x | avstack.pl | 271 | ||||
-rwxr-xr-x | script/static-stack-analyze.pl | 303 |
3 files changed, 306 insertions, 273 deletions
@@ -139,12 +139,12 @@ endif stack: default ${QUIET}test -n "${OBJDUMP}" ${QUIET}test -n "${ARCH_SHORTNAME}" - ${QUIET}./static-stack-analyze.pl ${OBJDUMP} ${ARCH_SHORTNAME} ${OBJECTS} + ${QUIET}script/static-stack-analyze.pl ${OBJDUMP} ${ARCH_SHORTNAME} ${OBJECTS} stackm: default ${QUIET}test -n "${OBJDUMP}" ${QUIET}test -n "${ARCH_SHORTNAME}" - ${QUIET}./static-stack-analyze.pl --machine-readable ${OBJDUMP} ${ARCH_SHORTNAME} ${OBJECTS} + ${QUIET}script/static-stack-analyze.pl --machine-readable ${OBJDUMP} ${ARCH_SHORTNAME} ${OBJECTS} clean: arch_clean rm -f build/system.elf @@ -166,6 +166,7 @@ help: arch_help @echo " kout_nop -- Do not write output to stdout / serial console" @echo " ostream -- include C++ ostream standard library" @echo " trace_malloc -- trace mpmalloc/mpcalloc/mprealloc calls on stdout" + @echo " stack_usage -- Generate .su files for stack usage estimation (-> make stack)" @echo @echo "${arch} drivers:" @ls -1 src/arch/${arch}/driver | fgrep .c | cut -d . -f 1 | sed 's/^/ /' diff --git a/avstack.pl b/avstack.pl deleted file mode 100755 index 11328e7..0000000 --- a/avstack.pl +++ /dev/null @@ -1,271 +0,0 @@ -#!/usr/bin/perl -w -# avstack.pl: AVR stack checker -# Copyright (C) 2013 Daniel Beer <dlbeer@gmail.com> -# Copyright (C) 2018 Daniel Friesel <daniel.friesel@uni-osnabrueck.de> -# -# Permission to use, copy, modify, and/or distribute this software for -# any purpose with or without fee is hereby granted, provided that the -# above copyright notice and this permission notice appear in all -# copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL -# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL -# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR -# PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -# PERFORMANCE OF THIS SOFTWARE. -# -# Usage -# ----- -# -# This script requires that you compile your code with -fstack-usage. -# This results in GCC/G++ generating a .su file for each .o file. Once you -# have these, do: -# -# ./avstack.pl <objdump binary> <call cost> <object files ...> -# -# This will disassemble .o files to construct a call graph, and read -# frame size information from .su. The call graph is traced to find, for -# each function: -# -# - Call height: the maximum call height of any callee, plus 1 -# (defined to be 1 for any function which has no callees). -# -# - Inherited frame: the maximum *inherited* frame of any callee, plus -# the GCC-calculated frame size of the function in question. -# -# Using these two pieces of information, we calculate a cost (estimated -# peak stack usage) for calling the function. Functions are then listed -# on stdout in decreasing order of cost. -# -# Functions which are recursive are marked with an 'R' to the left of -# them. Their cost is calculated for a single level of recursion. -# -# The peak stack usage of your entire program can usually be estimated -# as the stack cost of "main", plus the maximum stack cost of any -# interrupt handler which might execute. - -use strict; -use warnings; -use 5.010; - -# Configuration: set these as appropriate for your architecture/project. - -my $objdump = shift; -my $call_cost = shift; - -# First, we need to read all object and corresponding .su files. We're -# gathering a mapping of functions to callees and functions to frame -# sizes. We're just parsing at this stage -- callee name resolution -# comes later. - -my %frame_size; # "func@file" -> size -my %call_graph; # "func@file" -> {callees} -my %addresses; # "addr@file" -> "func@file" - -my %global_name; # "func" -> "func@file" -my %ambiguous; # "func" -> 1 - -foreach (@ARGV) { - - # Disassemble this object file to obtain a callees. Sources in the - # call graph are named "func@file". Targets in the call graph are - # named either "offset@file" or "funcname". We also keep a list of - # the addresses and names of each function we encounter. - my $objfile = $_; - my $source; - - open( DISASSEMBLY, "$objdump -Cdr $objfile|" ) - || die "Can't disassemble $objfile"; - while (<DISASSEMBLY>) { - chomp; - - if (/^([0-9a-fA-F]+) <(.*)>:/) { - my $a = $1; - my $name = $2; - - $source = "$name\@$objfile"; - $call_graph{$source} = {}; - $ambiguous{$name} = 1 if defined( $global_name{$name} ); - $global_name{$name} = "$name\@$objfile"; - - $a =~ s/^0*//; - $addresses{"$a\@$objfile"} = "$name\@$objfile"; - } - - if (/: R_[A-Za-z0-9_]+_(?:CALL|PLT32|ABS16)[ \t]+(.*)/) { - my $t = $1; - - if ( $t eq ".text" ) { - $t = "\@$objfile"; - } - elsif ( $t =~ /^\.text\+0x(.*)$/ ) { - $t = "$1\@$objfile"; - } - - $t =~ s{-0x4$}{}; - - $call_graph{$source}->{$t} = 1; - } - } - close(DISASSEMBLY); - - # Extract frame sizes from the corresponding .su file. - if ( $objfile =~ /^(.*).o$/ ) { - my $sufile = "$1.su"; - - open( SUFILE, "<$sufile" ) || die "Can't open $sufile"; - while (<SUFILE>) { - if ( - m{ ^ [^:]+ : \d+ : \d+ : [^\t]+? \s (?<name> \S+ ) \( .*? \) - (?: \s* \[with [^]]*\])? - \t - (?<size> \d+ )}x - ) - { - $frame_size{"$+{name}\@$objfile"} = $+{size} + $call_cost; - } - else { - say "No match $_"; - } - } - close(SUFILE); - } -} - -# In this step, we enumerate each list of callees in the call graph and -# try to resolve the symbols. We omit ones we can't resolve, but keep a -# set of them anyway. - -my %unresolved; - -foreach ( keys %call_graph ) { - my $from = $_; - my $callees = $call_graph{$from}; - my %resolved; - - foreach ( keys %$callees ) { - my $t = $_; - - if ( defined( $addresses{$t} ) ) { - $resolved{ $addresses{$t} } = 1; - } - elsif ( defined( $global_name{$t} ) ) { - $resolved{ $global_name{$t} } = 1; - warn "Ambiguous resolution: $t" if defined( $ambiguous{$t} ); - } - elsif ( defined( $call_graph{$t} ) ) { - $resolved{$t} = 1; - } - else { - $unresolved{$t} = 1; - } - } - - $call_graph{$from} = \%resolved; -} - -# Create fake edges and nodes to account for dynamic behaviour. -$call_graph{"INTERRUPT"} = {}; - -foreach ( keys %call_graph ) { - $call_graph{"INTERRUPT"}->{$_} = 1 if /^__vector_/; -} - -# Trace the call graph and calculate, for each function: -# -# - inherited frames: maximum inherited frame of callees, plus own -# frame size. -# - height: maximum height of callees, plus one. -# - recursion: is the function called recursively (including indirect -# recursion)? - -my %has_caller; -my %visited; -my %total_cost; -my %call_depth; - -sub trace { - my $f = shift; - - if ( $visited{$f} ) { - $visited{$f} = "R" if $visited{$f} eq "?"; - return; - } - - $visited{$f} = "?"; - - my $max_depth = 0; - my $max_frame = 0; - - my $targets = $call_graph{$f} || die "Unknown function: $f"; - if ( defined($targets) ) { - foreach ( keys %$targets ) { - my $t = $_; - - $has_caller{$t} = 1; - trace($t); - - my $is = $total_cost{$t}; - my $d = $call_depth{$t}; - - $max_frame = $is if $is > $max_frame; - $max_depth = $d if $d > $max_depth; - } - } - - $call_depth{$f} = $max_depth + 1; - $total_cost{$f} = $max_frame + ( $frame_size{$f} || 0 ); - $visited{$f} = " " if $visited{$f} eq "?"; -} - -foreach ( keys %call_graph ) { trace $_; } - -# Now, print results in a nice table. -printf " %-30s %8s %8s %8s\n", "Func", "Cost", "Frame", "Height"; -print "------------------------------------"; -print "------------------------------------\n"; - -my $max_iv = 0; -my $main = 0; - -foreach ( sort { $total_cost{$b} <=> $total_cost{$a} } keys %visited ) { - my $name = $_; - - if (/^(.*)@(.*)$/) { - $name = $1 unless $ambiguous{$name}; - } - - my $tag = $visited{$_}; - my $cost = $total_cost{$_}; - - $name = $_ if $ambiguous{$name}; - $tag = ">" unless $has_caller{$_}; - - if (/^__vector_/) { - $max_iv = $cost if $cost > $max_iv; - } - elsif (/^main@/) { - $main = $cost; - } - - if ( $ambiguous{$name} ) { $name = $_; } - - printf "%s %-30s %8d %8d %8d\n", $tag, $name, $cost, - $frame_size{$_} || 0, $call_depth{$_}; -} - -print "\n"; - -print "Peak execution estimate (main + worst-case IV):\n"; -printf " main = %d, worst IV = %d, total = %d\n", - $total_cost{ $global_name{"main"} }, - $total_cost{"INTERRUPT"}, - $total_cost{ $global_name{"main"} } + $total_cost{"INTERRUPT"}; - -print "\n"; - -print "The following functions were not resolved:\n"; -foreach ( keys %unresolved ) { print " $_\n"; } diff --git a/script/static-stack-analyze.pl b/script/static-stack-analyze.pl new file mode 100755 index 0000000..3089d3d --- /dev/null +++ b/script/static-stack-analyze.pl @@ -0,0 +1,303 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use 5.010; + +use File::Slurp qw(read_file); +use Getopt::Long; +use List::Util qw(uniq); + +our $VERSION = '0.00'; +my $machine_readable = 0; + +GetOptions( + 'm|machine-readable!' => \$machine_readable, +); + +if (@ARGV < 3) { + die("Usage: $0 <objdump binary> <architecture> <oject files ...>\n"); +} + +my %arch_data = ( + x64 => { + # For each function call, the 8-Byte return address is pushed onto the stack. + call_cost => 8, + call_relocation => 'R_X86_64_PLT32', + }, + avr => { + # On each function call, the 2-Byte return address is pushed onto the stack. + # (unhandled exception: ATTiny2313 and other devices with <= 256 Bytes of RAM) + call_cost => 2, + call_relocation => 'R_AVR_CALL', + }, + msp430 => { + # For each function call, the 4-Byte (20 bits at 2-Byte alignment) return + # address is pushed onto the stack. + call_cost => 4, + call_relocation => 'R_MSP430X_ABS16', + }, + msp430large => { + # For each function call, the 4-Byte (20 bits at 2-Byte alignment) return + # address is pushed onto the stack. + call_cost => 4, + call_relocation => 'R_MSP430X_ABS20_ADR_DST', + }, +); + +my %addresses; +my %call_graph; +my %frame_size; + +my ($objdump_path, $arch, @object_files) = @ARGV; + +sub add_call { + my ($parent, $child) = @_; + + push(@{$call_graph{$parent}{children}}, $child); + push(@{$call_graph{$child}{parents}}, $parent); + + #say "$parent -> $child;"; + + if ($parent =~ m{ ^ __vector_ }x) { + push(@{$call_graph{INTERRUPT}{children}}, $parent); + push(@{$call_graph{$parent}{parents}}, 'INTERRUPT'); + } +} + +sub trace { + my ($function) = @_; + + my $node = $call_graph{$function}; + + if ($node->{visited}) { + $node->{recursive} = 1; + return; + } + + $node->{visited} = 1; + $node->{max_depth} //= 1; + $node->{max_cost} //= $frame_size{$function}{size} // 0; + + for my $child (@{$node->{children} // []}) { + trace($child); + my $child_node = $call_graph{$child}; + if ($child_node->{max_depth} + 1 > $node->{max_depth}) { + $node->{max_depth} = $child_node->{max_depth} + 1; + } + if (defined $frame_size{$function}{size}) { + my $call_cost = $child_node->{max_cost} + $arch_data{$arch}{call_cost} + $frame_size{$function}{size}; + if ($call_cost > $node->{max_cost}) { + $node->{max_cost} = $call_cost; + } + } + else { + say "Frame size of $function is unknown!"; + $node->{unknown_child} = 1; + my $call_cost = $child_node->{max_cost} + $arch_data{$arch}{call_cost}; + if ($call_cost > $node->{max_cost}) { + $node->{max_cost} = $call_cost; + } + } + if ($child_node->{unknown_child}) { + $node->{unknown_child} = 1; + } + } + + $node->{visited} = 0; +} + +sub check_recursive { + my ($function) = @_; + my $node = $call_graph{$function}; + + if ($node->{visited}) { + return; + } + + $node->{visited} = 1; + + for my $child (@{$node->{children} // []}) { + if ($call_graph{$child}{recursive} or $call_graph{$child}{calls_recursive}) { + $node->{calls_recursive} = 1; + } + } + + if ($node->{recursive} or $node->{calls_recursive}) { + for my $parent (@{$node->{parents} // []}) { + check_recursive($parent); + } + } + + $node->{visited} = 0; +} + +for my $file (@object_files) { + my $dump = qx{$objdump_path -Cdr $file}; + my $current_function = 'NONE'; + my $call_reloc = $arch_data{$arch}{call_relocation}; + + my %address; + + for my $line (split(qr{\n}, $dump)) { + if ($line =~ m{ ^ (?<address> [0-9a-fA-Z]+ ) \s+ < (?<function> .* ) > : $}x) { + $current_function = $+{function}; + my $addr = $+{address}; + + $current_function =~ s{ \s \[ clone \s \S+ \] $ }{}x; + $addr =~ s{ ^ 0* }{}x; + + $address{$addr} = $current_function; + } + elsif ($line =~ m{ : \s+ ${call_reloc} \s+ (?<function> .+ ) }x) { + my $function = $+{function}; + + if ($function =~ m{ ^ [.] L \d+ $ }x) { + # MSP430, doesn't seem related to function calls + next; + } + + # x64 fixup. TODO what's the meaning of this? Edge cases to consider? + $function =~ s{ -0x4 $ }{}x; + + if ($function =~ m{ ^ [.] text [+] 0x (?<addr> [0-9a-fA-F]+ ) }x) { + $function = $address{$+{addr}} // 'UNRESOLVED'; + } + add_call($current_function, $function); + } + } + + my $su_file = $file; + $su_file =~ s{[.]o$}{.su}; + + for my $line (read_file($su_file)) { + if ($line =~ m{ ^ (?<file> [^:]+ ) : (?<line> [^:]+ ) : (?<column> [^:]+ ) + : (?: const \s+ )? (?: virtual \s+ )? + (?<return_type> \S+ ) (?: \s (?<function> [^\t]+ ))? + \t (?<size> [^\t]+ ) \t (?<estimate> .+ ) $ }x) { + + my $function = $+{function} // $+{return_type}; + my $size = $+{size}; + my $estimate = $+{estimate}; + + $frame_size{$function} = { + size => $size, + estimate => $estimate, + }; + + # For C(-ish) functions, objdump does not give the function signature, + # whereas .su does. Therefore, for each demangled function with + # signature, we create a duplicate entry without it. + if ($function =~ s{ \( .* \) $ }{}x) { + $frame_size{$function} = { + size => $size, + estimate => $estimate, + duplicate => 1, + }; + } + + # There's also some disagreement about "short" vs. "short int" and + # "long" vs. "long int"... + } + else { + say "$su_file: Cannot parse $line"; + } + } +} + +for my $function (keys %call_graph) { + @{$call_graph{$function}{children}} = uniq @{$call_graph{$function}{children}}; + @{$call_graph{$function}{parents}} = uniq @{$call_graph{$function}{parents}}; +} + +for my $function (keys %call_graph) { + trace($function); +} + +for my $function (keys %call_graph) { + check_recursive($function); +} + +if (not $machine_readable) { + printf("%2s %40s %10s%1s %10s%1s %10s\n", q{}, 'Function', 'Inclusive', '', 'Exclusive', '', 'Height'); +} + +for my $function (reverse sort { ($call_graph{$a}{max_cost} <=> $call_graph{$b}{max_cost}) || ($a cmp $b) } keys %call_graph) { + + my $node = $call_graph{$function}; + my $info = ''; + + if ($node->{recursive}) { + $info .= 'R'; + } + elsif ($node->{calls_recursive}) { + $info .= 'r'; + } + else { + $info .= ' '; + } + + if (@{$node->{parents} // []} == 0) { + $info .= '>'; + } + if ($machine_readable) { + printf("%s!%d!%d!%d!%d!%d!%d!%d!%d\n", + $function, + $call_graph{$function}{max_cost}, + $frame_size{$function}{size} // 0, + $call_graph{$function}{max_depth}, + $node->{recursive} || 0, + $node->{calls_recursive} || 0, + $node->{parents} || 0, + $call_graph{$function}{unknown_child} ? 1 : 0, + defined $frame_size{$function}{size} ? 1 : 0, + ); + } + else { + printf("%2s %40s %10d%1s %10d%1s %10d\n", + $info, substr($function, 0, 40), + $call_graph{$function}{max_cost}, + $call_graph{$function}{unknown_child} ? '+' : ' ', + $frame_size{$function}{size} // 0, + defined $frame_size{$function}{size} ? ' ' : '?', + $call_graph{$function}{max_depth}); + } +} + +__END__ + +=head1 NAME + +=head1 SYNOPSIS + +=head1 VERSION + +=head1 DESCRIPTION + +=head1 OPTIONS + +=over + +=back + +=head1 EXIT STATUS + +=head1 CONFIGURATION + +None. + +=head1 DEPENDENCIES + +=over + +=back + +=head1 BUGS AND LIMITATIONS + +=head1 AUTHOR + +Copyright (C) 2018 by Daniel Friesel E<lt>derf@finalrewind.orgE<gt> + +=head1 LICENSE + + 0. You just DO WHAT THE FUCK YOU WANT TO. |