mirror of
https://github.com/brendangregg/perf-tools.git
synced 2025-11-30 23:16:03 +07:00
perf-stat-hist custom histograms
This commit is contained in:
@@ -73,3 +73,50 @@ Tracing syscalls:sys_exit_read, power-of-4, max 1048576, for 10 seconds...
|
||||
1048576 -> : 0 | |
|
||||
|
||||
Most of our read()s were tiny, between 1 and 3 bytes.
|
||||
|
||||
|
||||
Using power-of-2, and a max of 1024:
|
||||
|
||||
# ./perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret
|
||||
Tracing syscalls:sys_exit_read, power-of-2, max 1024, until Ctrl-C...
|
||||
^C
|
||||
Range : Count Distribution
|
||||
-> -1 : 29 |## |
|
||||
0 -> 0 : 1 |# |
|
||||
1 -> 1 : 959 |######################################|
|
||||
2 -> 3 : 1 |# |
|
||||
4 -> 7 : 0 | |
|
||||
8 -> 15 : 2 |# |
|
||||
16 -> 31 : 14 |# |
|
||||
32 -> 63 : 1 |# |
|
||||
64 -> 127 : 0 | |
|
||||
128 -> 255 : 0 | |
|
||||
256 -> 511 : 0 | |
|
||||
512 -> 1023 : 1 |# |
|
||||
1024 -> : 1 |# |
|
||||
|
||||
|
||||
Specifying custom bucket sizes:
|
||||
|
||||
# ./perf-stat-hist -b "10 50 100 5000" syscalls:sys_exit_read ret
|
||||
Tracing syscalls:sys_exit_read, specified buckets, until Ctrl-C...
|
||||
^C
|
||||
Range : Count Distribution
|
||||
-> 9 : 989 |######################################|
|
||||
10 -> 49 : 5 |# |
|
||||
50 -> 99 : 0 | |
|
||||
100 -> 4999 : 2 |# |
|
||||
5000 -> : 0 | |
|
||||
|
||||
|
||||
Specifying a single value to bifurcate statistics:
|
||||
|
||||
# ./perf-stat-hist -b 10 syscalls:sys_exit_read ret
|
||||
Tracing syscalls:sys_exit_read, specified buckets, until Ctrl-C...
|
||||
^C
|
||||
Range : Count Distribution
|
||||
-> 9 : 2959 |######################################|
|
||||
10 -> : 7 |# |
|
||||
|
||||
This has the lowest overhead for collection, since only two tracepoint
|
||||
filter pairs are used.
|
||||
|
||||
@@ -75,7 +75,7 @@ Trace do_sys_open() with filename string:
|
||||
.TP
|
||||
Trace do_sys_open() for filenames ending in "stat"
|
||||
#
|
||||
.B kprobe 'p:myopen do_sys_open fn=+0(%si):string' 'fn ~ "*stat"'
|
||||
.B kprobe 'p:myopen do_sys_open fn=+0(%si):string' 'fn ~ """*stat"""'
|
||||
.SH FIELDS
|
||||
The output format depends on the kernel version, and headings can be printed
|
||||
using \-H. The format is the same as the ftrace function trace format, described
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
perf-stat-hist \- histogram summary of tracepoint values. Uses Linux perf_events.
|
||||
.SH SYNOPSIS
|
||||
.B perf-stat-hist
|
||||
tracepoint variable [seconds]
|
||||
[-h] [-b buckets|-P power] [-m max] tracepoint variable [seconds]
|
||||
.SH DESCRIPTION
|
||||
This is a proof-of-concept showing in-kernel histograms using Linux perf_events
|
||||
(aka the "perf" command), on older kernels where perf_events does not have
|
||||
@@ -22,6 +22,24 @@ Linux perf_events: add linux-tools-common, run "perf", then add any additional
|
||||
packages it requests. Also uses awk.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
\-h
|
||||
Usage message.
|
||||
.TP
|
||||
\-b buckets
|
||||
Specify a list of bucket points for the histogram as a string (eg, "10 500
|
||||
1000"). The histogram will include buckets for less-than the minimum, and
|
||||
greater-than-or-equal-to the maximum. If a single value is specified, two
|
||||
statistics only are gathered: for less-than and for greater-than-or-equal-to.
|
||||
The overhead is relative to the number of buckets, so only specifying a
|
||||
single value costs the lowest overhead.
|
||||
.TP
|
||||
\-P power
|
||||
Power for power-of histogram. By default, a power-of-4 histogram is created.
|
||||
This and the \-b option are exclusive.
|
||||
.TP
|
||||
\-m max
|
||||
Max value for power-of histograms.
|
||||
.TP
|
||||
tracepoint
|
||||
Tracepoint specification. Eg, syscalls:sys_enter_read.
|
||||
.TP
|
||||
@@ -35,19 +53,31 @@ Number of seconds to trace. If not specified, this runs until Ctrl-C.
|
||||
.TP
|
||||
Trace read() syscalls until Ctrl-C, and show histogram of requested size:
|
||||
#
|
||||
.B perf-stat-hist syscalls:sys_enter_read count
|
||||
.B perf\-stat\-hist syscalls:sys_enter_read count
|
||||
.TP
|
||||
Trace read() syscall completions until Ctrl-C, and show histogram of successful returned size:
|
||||
#
|
||||
.B perf-stat-hist syscalls:sys_exit_read count
|
||||
.B perf\-stat\-hist syscalls:sys_exit_read ret
|
||||
.TP
|
||||
Trace read() return sizes for 10 seconds, showing histogram:
|
||||
#
|
||||
.B perf-stat-hist syscalls:sys_exit_read count 10
|
||||
.B perf\-stat\-hist syscalls:sys_exit_read ret 10
|
||||
.TP
|
||||
Trace network transmits until Ctrl-C, and show histogram of packet size:
|
||||
#
|
||||
.B perf-stat-hist net:net_dev_xmit len
|
||||
.B perf\-stat\-hist net:net_dev_xmit len
|
||||
.TP
|
||||
Trace read() return sizes, using a power-of-10 histogram:
|
||||
.B perf\-stat\-hist \-P 10 syscalls:sys_exit_read ret
|
||||
.TP
|
||||
Trace read() return sizes, using a power-of-2 histogram, and a max of 1024:
|
||||
.B perf\-stat\-hist \-P 2 \-m 1024 syscalls:sys_exit_read ret
|
||||
.TP
|
||||
Trace read() return sizes, using the specified bucket points:
|
||||
.B perf\-stat\-hist \-b """10 50 100 5000""" syscalls:sys_exit_read ret
|
||||
.TP
|
||||
Trace read() return sizes, and bifurcate statistics by the value 10:
|
||||
.B perf-stat-hist \-b 10 syscalls:sys_exit_read ret
|
||||
.SH FIELDS
|
||||
.TP
|
||||
Range
|
||||
@@ -61,7 +91,9 @@ ASCII histogram representation of the Count column.
|
||||
.SH OVERHEAD
|
||||
While the counts are performed in-kernel, there is one tracepoint used per
|
||||
histogram bucket, so the overheads are higher than usual (relative to the
|
||||
number of buckets) than function counting using perf stat.
|
||||
number of buckets) than function counting using perf stat. The lowest
|
||||
overhead is when \-b is used to specify one bucket only, bifurcating
|
||||
statistics.
|
||||
.SH SOURCE
|
||||
This is from the perf-tools collection.
|
||||
.IP
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
# This is a proof-of-concept showing in-kernel histograms using Linux
|
||||
# perf_events (aka the "perf" command).
|
||||
#
|
||||
# USAGE: ./perf-stat-hist tracepoint variable [seconds]
|
||||
# USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint
|
||||
# variable [seconds]
|
||||
# eg,
|
||||
# ./perf-stat-hist syscalls:sys_enter_read count 10
|
||||
#
|
||||
@@ -38,50 +39,117 @@
|
||||
#
|
||||
# 30-Jun-2014 Brendan Gregg Created this.
|
||||
|
||||
if (( $# < 2 )); then
|
||||
echo "USAGE: $0 tracepoint variable [seconds]"
|
||||
echo " eg,"
|
||||
echo " $0 syscalls:sys_enter_read count 10"
|
||||
exit
|
||||
fi
|
||||
opt_buckets=0; opt_power=0; power=4; opt_max=0; max=$((1024 * 1024))
|
||||
duration=0; debug=0
|
||||
trap ':' INT QUIT TERM PIPE
|
||||
|
||||
function usage {
|
||||
cat <<-END >&2
|
||||
USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint
|
||||
variable [seconds]
|
||||
-b buckets # specify histogram bucket points
|
||||
-P power # power-of (default is 4)
|
||||
-m max # max value for power-of
|
||||
-h # this usage message
|
||||
eg,
|
||||
perf-stat-hist syscalls:sys_enter_read count 5
|
||||
# read() request histogram, 5 seconds
|
||||
perf-stat-hist syscalls:sys_exit_read ret 5
|
||||
# read() return histogram, 5 seconds
|
||||
perf-stat-hist -P 10 syscalls:sys_exit_read ret 5
|
||||
# ... use power-of-10
|
||||
perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret 5
|
||||
# ... use power-of-2, max 1024
|
||||
perf-stat-hist -b "10 50 100 500" syscalls:sys_exit_read ret 5
|
||||
# ... histogram based on these bucket ranges
|
||||
perf-stat-hist -b 10 syscalls:sys_exit_read ret 5
|
||||
# ... bifurcate by the value 10 (lowest overhead)
|
||||
END
|
||||
exit
|
||||
}
|
||||
|
||||
function die {
|
||||
echo >&2 "$@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
### process options
|
||||
while getopts b:hm:P: opt
|
||||
do
|
||||
case $opt in
|
||||
b) opt_buckets=1; buckets=($OPTARG) ;;
|
||||
P) opt_power=1; power=$OPTARG ;;
|
||||
m) opt_max=1; max=$OPTARG ;;
|
||||
h|?) usage ;;
|
||||
esac
|
||||
done
|
||||
shift $(( $OPTIND - 1 ))
|
||||
(( $# < 2 )) && usage
|
||||
tpoint=$1 # tracepoint
|
||||
var=$2 # variable for histogram
|
||||
duration=${3:-5}
|
||||
duration=${3}
|
||||
|
||||
pow=4 # power-of for bucket size
|
||||
max=$(( 1024 * 1024 )) # intended max bucket value
|
||||
debug=0
|
||||
### option logic
|
||||
(( opt_buckets && opt_power )) && die "ERROR: use either -b or -P"
|
||||
(( opt_power && power < 2 )) && die "ERROR: -P power must be 2 or higher"
|
||||
|
||||
### check that tracepoint exists
|
||||
if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events >/dev/null
|
||||
if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events > /dev/null
|
||||
then
|
||||
echo >&2 "ERROR: tracepoint \"$tpoint\" not found. Exiting..."
|
||||
[[ "$USER" != "root" ]] && echo >&2 "Not root user?"
|
||||
exit
|
||||
exit 1
|
||||
fi
|
||||
|
||||
### auto build power-of buckets
|
||||
if (( !opt_buckets )); then
|
||||
b=0
|
||||
s=1;
|
||||
while (( s <= max )); do
|
||||
b="$b $s"
|
||||
(( s *= power ))
|
||||
done
|
||||
buckets=($b)
|
||||
fi
|
||||
|
||||
### build list of tracepoints and filters for each histogram bucket
|
||||
tpoints=$(
|
||||
echo ' -e' $tpoint --filter '"'$var' == 0"'
|
||||
s=1;
|
||||
while (( s < max )); do
|
||||
echo ' -e' $tpoint --filter '"'$var' >= '$s' && '\
|
||||
$var' < '$((s * pow))'"'
|
||||
(( s *= pow ))
|
||||
done
|
||||
echo ' -e' $tpoint --filter '"'$var' >= '$s'"'
|
||||
)
|
||||
max=${buckets[${#buckets[@]} - 1]} # last element
|
||||
((max_i = ${#buckets[*]} - 1))
|
||||
tpoints=" -e $tpoint --filter \"$var < ${buckets[0]}\""
|
||||
awkarray=
|
||||
i=0;
|
||||
while (( i < max_i )); do
|
||||
tpoints="$tpoints -e $tpoint --filter \"$var >= ${buckets[$i]} && "
|
||||
tpoints="$tpoints $var < ${buckets[$i + 1]}\""
|
||||
awkarray="$awkarray buckets[$i]=${buckets[$i]};"
|
||||
(( i++ ))
|
||||
done
|
||||
awkarray="$awkarray buckets[$max_i]=${buckets[$max_i]};"
|
||||
tpoints="$tpoints -e $tpoint --filter \"$var >= ${buckets[$max_i]}\""
|
||||
|
||||
if (( debug )); then
|
||||
echo buckets: ${buckets[*]}
|
||||
echo tracepoints: $tpoints
|
||||
echo awkarray: ${awkarray[*]}
|
||||
fi
|
||||
|
||||
echo "Tracing $tpoint, power-of-$pow, max $max, for $duration seconds..."
|
||||
### prepare to run
|
||||
if (( duration )); then
|
||||
etext="for $duration seconds"
|
||||
cmd="sleep $duration"
|
||||
else
|
||||
etext="until Ctrl-C"
|
||||
cmd="sleep 999999"
|
||||
fi
|
||||
if (( opt_buckets )); then
|
||||
echo "Tracing $tpoint, specified buckets, $etext..."
|
||||
else
|
||||
echo "Tracing $tpoint, power-of-$power, max $max, $etext..."
|
||||
fi
|
||||
|
||||
### run perf
|
||||
out="-o /dev/stdout" # a workaround needed in linux 3.2; not by 3.4.15
|
||||
stat=$(eval perf stat $tpoints -a $out sleep $duration 2>&1)
|
||||
stat=$(eval perf stat $tpoints -a $out $cmd 2>&1)
|
||||
|
||||
if (( $? != 0 )); then
|
||||
echo >&2 "ERROR running perf:"
|
||||
@@ -103,35 +171,38 @@ most=$(echo "$stat" | awk -v tpoint=$tpoint '
|
||||
|
||||
### process output
|
||||
echo
|
||||
echo "$stat" | awk -v tpoint=$tpoint -v max=$max -v pow=$pow -v most=$most '
|
||||
echo "$stat" | awk -v tpoint=$tpoint -v max_i=$max_i -v most=$most '
|
||||
function star(sval, smax, swidth) {
|
||||
stars = ""
|
||||
if (smax == 0) return ""
|
||||
for (i = 0; i < (swidth * sval / smax); i++) {
|
||||
for (si = 0; si < (swidth * sval / smax); si++) {
|
||||
stars = stars "#"
|
||||
}
|
||||
return stars
|
||||
}
|
||||
BEGIN {
|
||||
'"$awkarray"'
|
||||
printf(" %-15s: %-8s %s\n", "Range", "Count",
|
||||
"Distribution")
|
||||
}
|
||||
/Performance counter stats/ { s = 0 }
|
||||
/Performance counter stats/ { i = -1 }
|
||||
# reverse order of rule set is important
|
||||
{ ok = 0 }
|
||||
$2 == tpoint { num = $1; gsub(/,/, "", num); ok = 1 }
|
||||
ok && s > 0 && s >= max {
|
||||
printf(" %10d -> %-10s: %-8s |%-38s|\n", s, "", num,
|
||||
star(num, most, 38))
|
||||
ok && i >= max_i {
|
||||
printf(" %10d -> %-10s: %-8s |%-38s|\n", buckets[i],
|
||||
"", num, star(num, most, 38))
|
||||
next
|
||||
}
|
||||
ok && s > 0 && s < max {
|
||||
printf(" %10d -> %-10d: %-8s |%-38s|\n", s, s * pow - 1, num,
|
||||
star(num, most, 38))
|
||||
s *= pow
|
||||
ok && i >= 0 && i < max_i {
|
||||
printf(" %10d -> %-10d: %-8s |%-38s|\n", buckets[i],
|
||||
buckets[i+1] - 1, num, star(num, most, 38))
|
||||
i++
|
||||
next
|
||||
}
|
||||
ok && s == 0 {
|
||||
printf(" %10d %-10s: %-8s |%-38s|\n", 0, "", num,
|
||||
star(num, most, 38))
|
||||
s = 1
|
||||
ok && i == -1 {
|
||||
printf(" %10s -> %-10d: %-8s |%-38s|\n", "",
|
||||
buckets[0] - 1, num, star(num, most, 38))
|
||||
i++
|
||||
}
|
||||
'
|
||||
|
||||
Reference in New Issue
Block a user