diff --git a/examples/perf-stat-hist_example.txt b/examples/perf-stat-hist_example.txt index b5a7c60..a231e9c 100644 --- a/examples/perf-stat-hist_example.txt +++ b/examples/perf-stat-hist_example.txt @@ -73,3 +73,50 @@ Tracing syscalls:sys_exit_read, power-of-4, max 1048576, for 10 seconds... 1048576 -> : 0 | | Most of our read()s were tiny, between 1 and 3 bytes. + + +Using power-of-2, and a max of 1024: + +# ./perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret +Tracing syscalls:sys_exit_read, power-of-2, max 1024, until Ctrl-C... +^C + Range : Count Distribution + -> -1 : 29 |## | + 0 -> 0 : 1 |# | + 1 -> 1 : 959 |######################################| + 2 -> 3 : 1 |# | + 4 -> 7 : 0 | | + 8 -> 15 : 2 |# | + 16 -> 31 : 14 |# | + 32 -> 63 : 1 |# | + 64 -> 127 : 0 | | + 128 -> 255 : 0 | | + 256 -> 511 : 0 | | + 512 -> 1023 : 1 |# | + 1024 -> : 1 |# | + + +Specifying custom bucket sizes: + +# ./perf-stat-hist -b "10 50 100 5000" syscalls:sys_exit_read ret +Tracing syscalls:sys_exit_read, specified buckets, until Ctrl-C... +^C + Range : Count Distribution + -> 9 : 989 |######################################| + 10 -> 49 : 5 |# | + 50 -> 99 : 0 | | + 100 -> 4999 : 2 |# | + 5000 -> : 0 | | + + +Specifying a single value to bifurcate statistics: + +# ./perf-stat-hist -b 10 syscalls:sys_exit_read ret +Tracing syscalls:sys_exit_read, specified buckets, until Ctrl-C... +^C + Range : Count Distribution + -> 9 : 2959 |######################################| + 10 -> : 7 |# | + +This has the lowest overhead for collection, since only two tracepoint +filter pairs are used. diff --git a/man/man8/kprobe.8 b/man/man8/kprobe.8 index 4811a3b..6a331e1 100644 --- a/man/man8/kprobe.8 +++ b/man/man8/kprobe.8 @@ -75,7 +75,7 @@ Trace do_sys_open() with filename string: .TP Trace do_sys_open() for filenames ending in "stat" # -.B kprobe 'p:myopen do_sys_open fn=+0(%si):string' 'fn ~ "*stat"' +.B kprobe 'p:myopen do_sys_open fn=+0(%si):string' 'fn ~ """*stat"""' .SH FIELDS The output format depends on the kernel version, and headings can be printed using \-H. The format is the same as the ftrace function trace format, described diff --git a/man/man8/perf-stat-hist.8 b/man/man8/perf-stat-hist.8 index 8be8016..8baa2a6 100644 --- a/man/man8/perf-stat-hist.8 +++ b/man/man8/perf-stat-hist.8 @@ -3,7 +3,7 @@ perf-stat-hist \- histogram summary of tracepoint values. Uses Linux perf_events. .SH SYNOPSIS .B perf-stat-hist -tracepoint variable [seconds] +[-h] [-b buckets|-P power] [-m max] tracepoint variable [seconds] .SH DESCRIPTION This is a proof-of-concept showing in-kernel histograms using Linux perf_events (aka the "perf" command), on older kernels where perf_events does not have @@ -22,6 +22,24 @@ Linux perf_events: add linux-tools-common, run "perf", then add any additional packages it requests. Also uses awk. .SH OPTIONS .TP +\-h +Usage message. +.TP +\-b buckets +Specify a list of bucket points for the histogram as a string (eg, "10 500 +1000"). The histogram will include buckets for less-than the minimum, and +greater-than-or-equal-to the maximum. If a single value is specified, two +statistics only are gathered: for less-than and for greater-than-or-equal-to. +The overhead is relative to the number of buckets, so only specifying a +single value costs the lowest overhead. +.TP +\-P power +Power for power-of histogram. By default, a power-of-4 histogram is created. +This and the \-b option are exclusive. +.TP +\-m max +Max value for power-of histograms. +.TP tracepoint Tracepoint specification. Eg, syscalls:sys_enter_read. .TP @@ -35,19 +53,31 @@ Number of seconds to trace. If not specified, this runs until Ctrl-C. .TP Trace read() syscalls until Ctrl-C, and show histogram of requested size: # -.B perf-stat-hist syscalls:sys_enter_read count +.B perf\-stat\-hist syscalls:sys_enter_read count .TP Trace read() syscall completions until Ctrl-C, and show histogram of successful returned size: # -.B perf-stat-hist syscalls:sys_exit_read count +.B perf\-stat\-hist syscalls:sys_exit_read ret .TP Trace read() return sizes for 10 seconds, showing histogram: # -.B perf-stat-hist syscalls:sys_exit_read count 10 +.B perf\-stat\-hist syscalls:sys_exit_read ret 10 .TP Trace network transmits until Ctrl-C, and show histogram of packet size: # -.B perf-stat-hist net:net_dev_xmit len +.B perf\-stat\-hist net:net_dev_xmit len +.TP +Trace read() return sizes, using a power-of-10 histogram: +.B perf\-stat\-hist \-P 10 syscalls:sys_exit_read ret +.TP +Trace read() return sizes, using a power-of-2 histogram, and a max of 1024: +.B perf\-stat\-hist \-P 2 \-m 1024 syscalls:sys_exit_read ret +.TP +Trace read() return sizes, using the specified bucket points: +.B perf\-stat\-hist \-b """10 50 100 5000""" syscalls:sys_exit_read ret +.TP +Trace read() return sizes, and bifurcate statistics by the value 10: +.B perf-stat-hist \-b 10 syscalls:sys_exit_read ret .SH FIELDS .TP Range @@ -61,7 +91,9 @@ ASCII histogram representation of the Count column. .SH OVERHEAD While the counts are performed in-kernel, there is one tracepoint used per histogram bucket, so the overheads are higher than usual (relative to the -number of buckets) than function counting using perf stat. +number of buckets) than function counting using perf stat. The lowest +overhead is when \-b is used to specify one bucket only, bifurcating +statistics. .SH SOURCE This is from the perf-tools collection. .IP diff --git a/misc/perf-stat-hist b/misc/perf-stat-hist index 8b77122..6947324 100755 --- a/misc/perf-stat-hist +++ b/misc/perf-stat-hist @@ -5,7 +5,8 @@ # This is a proof-of-concept showing in-kernel histograms using Linux # perf_events (aka the "perf" command). # -# USAGE: ./perf-stat-hist tracepoint variable [seconds] +# USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint +# variable [seconds] # eg, # ./perf-stat-hist syscalls:sys_enter_read count 10 # @@ -38,50 +39,117 @@ # # 30-Jun-2014 Brendan Gregg Created this. -if (( $# < 2 )); then - echo "USAGE: $0 tracepoint variable [seconds]" - echo " eg," - echo " $0 syscalls:sys_enter_read count 10" - exit -fi +opt_buckets=0; opt_power=0; power=4; opt_max=0; max=$((1024 * 1024)) +duration=0; debug=0 +trap ':' INT QUIT TERM PIPE +function usage { + cat <<-END >&2 + USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint + variable [seconds] + -b buckets # specify histogram bucket points + -P power # power-of (default is 4) + -m max # max value for power-of + -h # this usage message + eg, + perf-stat-hist syscalls:sys_enter_read count 5 + # read() request histogram, 5 seconds + perf-stat-hist syscalls:sys_exit_read ret 5 + # read() return histogram, 5 seconds + perf-stat-hist -P 10 syscalls:sys_exit_read ret 5 + # ... use power-of-10 + perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret 5 + # ... use power-of-2, max 1024 + perf-stat-hist -b "10 50 100 500" syscalls:sys_exit_read ret 5 + # ... histogram based on these bucket ranges + perf-stat-hist -b 10 syscalls:sys_exit_read ret 5 + # ... bifurcate by the value 10 (lowest overhead) +END + exit +} + +function die { + echo >&2 "$@" + exit 1 +} + +### process options +while getopts b:hm:P: opt +do + case $opt in + b) opt_buckets=1; buckets=($OPTARG) ;; + P) opt_power=1; power=$OPTARG ;; + m) opt_max=1; max=$OPTARG ;; + h|?) usage ;; + esac +done +shift $(( $OPTIND - 1 )) +(( $# < 2 )) && usage tpoint=$1 # tracepoint var=$2 # variable for histogram -duration=${3:-5} +duration=${3} -pow=4 # power-of for bucket size -max=$(( 1024 * 1024 )) # intended max bucket value -debug=0 +### option logic +(( opt_buckets && opt_power )) && die "ERROR: use either -b or -P" +(( opt_power && power < 2 )) && die "ERROR: -P power must be 2 or higher" ### check that tracepoint exists -if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events >/dev/null +if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events > /dev/null then echo >&2 "ERROR: tracepoint \"$tpoint\" not found. Exiting..." [[ "$USER" != "root" ]] && echo >&2 "Not root user?" - exit + exit 1 +fi + +### auto build power-of buckets +if (( !opt_buckets )); then + b=0 + s=1; + while (( s <= max )); do + b="$b $s" + (( s *= power )) + done + buckets=($b) fi ### build list of tracepoints and filters for each histogram bucket -tpoints=$( - echo ' -e' $tpoint --filter '"'$var' == 0"' - s=1; - while (( s < max )); do - echo ' -e' $tpoint --filter '"'$var' >= '$s' && '\ - $var' < '$((s * pow))'"' - (( s *= pow )) - done - echo ' -e' $tpoint --filter '"'$var' >= '$s'"' -) +max=${buckets[${#buckets[@]} - 1]} # last element +((max_i = ${#buckets[*]} - 1)) +tpoints=" -e $tpoint --filter \"$var < ${buckets[0]}\"" +awkarray= +i=0; +while (( i < max_i )); do + tpoints="$tpoints -e $tpoint --filter \"$var >= ${buckets[$i]} && " + tpoints="$tpoints $var < ${buckets[$i + 1]}\"" + awkarray="$awkarray buckets[$i]=${buckets[$i]};" + (( i++ )) +done +awkarray="$awkarray buckets[$max_i]=${buckets[$max_i]};" +tpoints="$tpoints -e $tpoint --filter \"$var >= ${buckets[$max_i]}\"" if (( debug )); then + echo buckets: ${buckets[*]} echo tracepoints: $tpoints + echo awkarray: ${awkarray[*]} fi -echo "Tracing $tpoint, power-of-$pow, max $max, for $duration seconds..." +### prepare to run +if (( duration )); then + etext="for $duration seconds" + cmd="sleep $duration" +else + etext="until Ctrl-C" + cmd="sleep 999999" +fi +if (( opt_buckets )); then + echo "Tracing $tpoint, specified buckets, $etext..." +else + echo "Tracing $tpoint, power-of-$power, max $max, $etext..." +fi ### run perf out="-o /dev/stdout" # a workaround needed in linux 3.2; not by 3.4.15 -stat=$(eval perf stat $tpoints -a $out sleep $duration 2>&1) +stat=$(eval perf stat $tpoints -a $out $cmd 2>&1) if (( $? != 0 )); then echo >&2 "ERROR running perf:" @@ -103,35 +171,38 @@ most=$(echo "$stat" | awk -v tpoint=$tpoint ' ### process output echo -echo "$stat" | awk -v tpoint=$tpoint -v max=$max -v pow=$pow -v most=$most ' +echo "$stat" | awk -v tpoint=$tpoint -v max_i=$max_i -v most=$most ' function star(sval, smax, swidth) { stars = "" if (smax == 0) return "" - for (i = 0; i < (swidth * sval / smax); i++) { + for (si = 0; si < (swidth * sval / smax); si++) { stars = stars "#" } return stars } BEGIN { + '"$awkarray"' printf(" %-15s: %-8s %s\n", "Range", "Count", "Distribution") } - /Performance counter stats/ { s = 0 } + /Performance counter stats/ { i = -1 } # reverse order of rule set is important { ok = 0 } $2 == tpoint { num = $1; gsub(/,/, "", num); ok = 1 } - ok && s > 0 && s >= max { - printf(" %10d -> %-10s: %-8s |%-38s|\n", s, "", num, - star(num, most, 38)) + ok && i >= max_i { + printf(" %10d -> %-10s: %-8s |%-38s|\n", buckets[i], + "", num, star(num, most, 38)) + next } - ok && s > 0 && s < max { - printf(" %10d -> %-10d: %-8s |%-38s|\n", s, s * pow - 1, num, - star(num, most, 38)) - s *= pow + ok && i >= 0 && i < max_i { + printf(" %10d -> %-10d: %-8s |%-38s|\n", buckets[i], + buckets[i+1] - 1, num, star(num, most, 38)) + i++ + next } - ok && s == 0 { - printf(" %10d %-10s: %-8s |%-38s|\n", 0, "", num, - star(num, most, 38)) - s = 1 + ok && i == -1 { + printf(" %10s -> %-10d: %-8s |%-38s|\n", "", + buckets[0] - 1, num, star(num, most, 38)) + i++ } '