perf-stat-hist custom histograms

2025-11-30 23:16:03 +07:00 · 2014-07-22 17:10:44 -07:00
parent b1a16b11fb
commit 5e72e166ff
4 changed files with 196 additions and 46 deletions
--- a/examples/perf-stat-hist_example.txt
+++ b/examples/perf-stat-hist_example.txt
@@ -73,3 +73,50 @@ Tracing syscalls:sys_exit_read, power-of-4, max 1048576, for 10 seconds...
      1048576 ->           : 0        |                                      |

 Most of our read()s were tiny, between 1 and 3 bytes.
+
+
+Using power-of-2, and a max of 1024:
+
+# ./perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret
+Tracing syscalls:sys_exit_read, power-of-2, max 1024, until Ctrl-C...
+^C
+            Range          : Count    Distribution
+              -> -1        : 29       |##                                    |
+            0 -> 0         : 1        |#                                     |
+            1 -> 1         : 959      |######################################|
+            2 -> 3         : 1        |#                                     |
+            4 -> 7         : 0        |                                      |
+            8 -> 15        : 2        |#                                     |
+           16 -> 31        : 14       |#                                     |
+           32 -> 63        : 1        |#                                     |
+           64 -> 127       : 0        |                                      |
+          128 -> 255       : 0        |                                      |
+          256 -> 511       : 0        |                                      |
+          512 -> 1023      : 1        |#                                     |
+         1024 ->           : 1        |#                                     |
+
+
+Specifying custom bucket sizes:
+
+# ./perf-stat-hist -b "10 50 100 5000" syscalls:sys_exit_read ret
+Tracing syscalls:sys_exit_read, specified buckets, until Ctrl-C...
+^C
+            Range          : Count    Distribution
+              -> 9         : 989      |######################################|
+           10 -> 49        : 5        |#                                     |
+           50 -> 99        : 0        |                                      |
+          100 -> 4999      : 2        |#                                     |
+         5000 ->           : 0        |                                      |
+
+
+Specifying a single value to bifurcate statistics:
+
+# ./perf-stat-hist -b 10 syscalls:sys_exit_read ret
+Tracing syscalls:sys_exit_read, specified buckets, until Ctrl-C...
+^C
+            Range          : Count    Distribution
+              -> 9         : 2959     |######################################|
+           10 ->           : 7        |#                                     |
+
+This has the lowest overhead for collection, since only two tracepoint 
+filter pairs are used.
--- a/man/man8/kprobe.8
+++ b/man/man8/kprobe.8
@@ -75,7 +75,7 @@ Trace do_sys_open() with filename string:
 .TP
 Trace do_sys_open() for filenames ending in "stat"
 #
-.B kprobe 'p:myopen do_sys_open fn=+0(%si):string' 'fn ~ "*stat"'
+.B kprobe 'p:myopen do_sys_open fn=+0(%si):string' 'fn ~ """*stat"""'
 .SH FIELDS
 The output format depends on the kernel version, and headings can be printed
 using \-H. The format is the same as the ftrace function trace format, described
--- a/man/man8/perf-stat-hist.8
+++ b/man/man8/perf-stat-hist.8
@@ -3,7 +3,7 @@
 perf-stat-hist \- histogram summary of tracepoint values. Uses Linux perf_events.
 .SH SYNOPSIS
 .B perf-stat-hist
-tracepoint variable [seconds]
+[-h] [-b buckets|-P power] [-m max] tracepoint variable [seconds]
 .SH DESCRIPTION
 This is a proof-of-concept showing in-kernel histograms using Linux perf_events
 (aka the "perf" command), on older kernels where perf_events does not have
@@ -22,6 +22,24 @@ Linux perf_events: add linux-tools-common, run "perf", then add any additional
 packages it requests. Also uses awk.
 .SH OPTIONS
 .TP
+\-h
+Usage message.
+.TP
+\-b buckets
+Specify a list of bucket points for the histogram as a string (eg, "10 500
+1000"). The histogram will include buckets for less-than the minimum, and
+greater-than-or-equal-to the maximum.  If a single value is specified, two
+statistics only are gathered: for less-than and for greater-than-or-equal-to.
+The overhead is relative to the number of buckets, so only specifying a
+single value costs the lowest overhead.
+.TP
+\-P power
+Power for power-of histogram. By default, a power-of-4 histogram is created.
+This and the \-b option are exclusive.
+.TP
+\-m max
+Max value for power-of histograms.
+.TP
 tracepoint
 Tracepoint specification. Eg, syscalls:sys_enter_read.
 .TP
@@ -35,19 +53,31 @@ Number of seconds to trace. If not specified, this runs until Ctrl-C.
 .TP
 Trace read() syscalls until Ctrl-C, and show histogram of requested size:
 #
-.B perf-stat-hist syscalls:sys_enter_read count
+.B perf\-stat\-hist syscalls:sys_enter_read count
 .TP
 Trace read() syscall completions until Ctrl-C, and show histogram of successful returned size:
 #
-.B perf-stat-hist syscalls:sys_exit_read count
+.B perf\-stat\-hist syscalls:sys_exit_read ret
 .TP
 Trace read() return sizes for 10 seconds, showing histogram:
 #
-.B perf-stat-hist syscalls:sys_exit_read count 10
+.B perf\-stat\-hist syscalls:sys_exit_read ret 10
 .TP
 Trace network transmits until Ctrl-C, and show histogram of packet size:
 #
-.B perf-stat-hist net:net_dev_xmit len
+.B perf\-stat\-hist net:net_dev_xmit len
+.TP
+Trace read() return sizes, using a power-of-10 histogram:
+.B perf\-stat\-hist \-P 10 syscalls:sys_exit_read ret
+.TP
+Trace read() return sizes, using a power-of-2 histogram, and a max of 1024:
+.B perf\-stat\-hist \-P 2 \-m 1024 syscalls:sys_exit_read ret
+.TP
+Trace read() return sizes, using the specified bucket points:
+.B perf\-stat\-hist \-b """10 50 100 5000""" syscalls:sys_exit_read ret
+.TP
+Trace read() return sizes, and bifurcate statistics by the value 10:
+.B perf-stat-hist \-b 10 syscalls:sys_exit_read ret
 .SH FIELDS
 .TP
 Range
@@ -61,7 +91,9 @@ ASCII histogram representation of the Count column.
 .SH OVERHEAD
 While the counts are performed in-kernel, there is one tracepoint used per
 histogram bucket, so the overheads are higher than usual (relative to the
-number of buckets) than function counting using perf stat.
+number of buckets) than function counting using perf stat. The lowest
+overhead is when \-b is used to specify one bucket only, bifurcating
+statistics.
 .SH SOURCE
 This is from the perf-tools collection.
 .IP
--- a/misc/perf-stat-hist
+++ b/misc/perf-stat-hist
@@ -5,7 +5,8 @@
 # This is a proof-of-concept showing in-kernel histograms using Linux
 # perf_events (aka the "perf" command).
 #
-# USAGE: ./perf-stat-hist tracepoint variable [seconds]
+# USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint
+#        variable [seconds]
 #    eg,
 #        ./perf-stat-hist syscalls:sys_enter_read count 10  
 #
@@ -38,50 +39,117 @@
 #
 # 30-Jun-2014	Brendan Gregg	Created this.

-if (( $# < 2 )); then
-	echo "USAGE: $0 tracepoint variable [seconds]"
-	echo "   eg,"
-	echo "       $0 syscalls:sys_enter_read count 10"
-	exit
-fi
+opt_buckets=0; opt_power=0; power=4; opt_max=0; max=$((1024 * 1024))
+duration=0; debug=0
+trap ':' INT QUIT TERM PIPE

+function usage {
+	cat <<-END >&2
+	USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint
+	                      variable [seconds]
+	                 -b buckets      # specify histogram bucket points
+	                 -P power        # power-of (default is 4)
+	                 -m max          # max value for power-of
+	                 -h              # this usage message
+	   eg,
+	       perf-stat-hist syscalls:sys_enter_read count 5
+	                 # read() request histogram, 5 seconds
+	       perf-stat-hist syscalls:sys_exit_read ret 5
+	                 # read() return histogram, 5 seconds
+	       perf-stat-hist -P 10 syscalls:sys_exit_read ret 5
+	                 # ... use power-of-10
+	       perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret 5
+	                 # ... use power-of-2, max 1024
+	       perf-stat-hist -b "10 50 100 500" syscalls:sys_exit_read ret 5
+	                 # ... histogram based on these bucket ranges
+	       perf-stat-hist -b 10 syscalls:sys_exit_read ret 5
+	                 # ... bifurcate by the value 10 (lowest overhead)
+END
+	exit
+}
+
+function die {
+	echo >&2 "$@"
+	exit 1
+}
+
+### process options
+while getopts b:hm:P: opt
+do
+	case $opt in
+	b)	opt_buckets=1; buckets=($OPTARG) ;;
+	P)	opt_power=1; power=$OPTARG ;;
+	m)	opt_max=1; max=$OPTARG ;;
+	h|?)	usage ;;
+	esac
+done
+shift $(( $OPTIND - 1 ))
+(( $# < 2 )) && usage
 tpoint=$1			# tracepoint
 var=$2				# variable for histogram
-duration=${3:-5}
+duration=${3}

-pow=4				# power-of for bucket size
-max=$(( 1024 * 1024 ))		# intended max bucket value
-debug=0
+### option logic
+(( opt_buckets && opt_power )) && die "ERROR: use either -b or -P"
+(( opt_power && power < 2 )) && die "ERROR: -P power must be 2 or higher"

 ### check that tracepoint exists
-if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events >/dev/null
+if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events > /dev/null
 then
 	echo >&2 "ERROR: tracepoint \"$tpoint\" not found. Exiting..."
 	[[ "$USER" != "root" ]] && echo >&2 "Not root user?"
-	exit
+	exit 1
+fi
+
+### auto build power-of buckets
+if (( !opt_buckets )); then
+	b=0
+	s=1;
+	while (( s <= max )); do
+		b="$b $s"
+		(( s *= power ))
+	done
+	buckets=($b)
 fi

 ### build list of tracepoints and filters for each histogram bucket
-tpoints=$(
-	echo ' -e' $tpoint --filter '"'$var' == 0"'
-	s=1;
-	while (( s < max )); do
-		echo ' -e' $tpoint --filter '"'$var' >= '$s' && '\
-		    $var' < '$((s * pow))'"'
-		(( s *= pow ))
-	done
-	echo ' -e' $tpoint --filter '"'$var' >= '$s'"'
-)
+max=${buckets[${#buckets[@]} - 1]}	# last element
+((max_i = ${#buckets[*]} - 1))
+tpoints=" -e $tpoint --filter \"$var < ${buckets[0]}\""
+awkarray=
+i=0;
+while (( i < max_i )); do
+	tpoints="$tpoints -e $tpoint --filter \"$var >= ${buckets[$i]} && "
+	tpoints="$tpoints $var < ${buckets[$i + 1]}\""
+	awkarray="$awkarray buckets[$i]=${buckets[$i]};"
+	(( i++ ))
+done
+awkarray="$awkarray buckets[$max_i]=${buckets[$max_i]};"
+tpoints="$tpoints -e $tpoint --filter \"$var >= ${buckets[$max_i]}\""

 if (( debug )); then
+	echo buckets: ${buckets[*]}
 	echo tracepoints: $tpoints
+	echo awkarray: ${awkarray[*]}
 fi

-echo "Tracing $tpoint, power-of-$pow, max $max, for $duration seconds..."
+### prepare to run
+if (( duration )); then
+	etext="for $duration seconds"
+	cmd="sleep $duration"
+else
+	etext="until Ctrl-C"
+	cmd="sleep 999999"
+fi
+if (( opt_buckets )); then
+	echo "Tracing $tpoint, specified buckets, $etext..."
+else
+	echo "Tracing $tpoint, power-of-$power, max $max, $etext..."
+fi

 ### run perf
 out="-o /dev/stdout"	# a workaround needed in linux 3.2; not by 3.4.15
-stat=$(eval perf stat $tpoints -a $out sleep $duration 2>&1)
+stat=$(eval perf stat $tpoints -a $out $cmd 2>&1)

 if (( $? != 0 )); then
 	echo >&2 "ERROR running perf:"
@@ -103,35 +171,38 @@ most=$(echo "$stat" | awk -v tpoint=$tpoint '

 ### process output
 echo
-echo "$stat" | awk -v tpoint=$tpoint -v max=$max -v pow=$pow -v most=$most '
+echo "$stat" | awk -v tpoint=$tpoint -v max_i=$max_i -v most=$most '
 	function star(sval, smax, swidth) {
 		stars = ""
 		if (smax == 0) return ""
-		for (i = 0; i < (swidth * sval / smax); i++) {
+		for (si = 0; si < (swidth * sval / smax); si++) {
 			stars = stars "#"
 		}
 		return stars
 	}
 	BEGIN {
+		'"$awkarray"'
 		printf("            %-15s: %-8s %s\n", "Range", "Count",
 		    "Distribution")
 	}
-	/Performance counter stats/ { s = 0 }
+	/Performance counter stats/ { i = -1 }
 	# reverse order of rule set is important
 	{ ok = 0 }
 	$2 == tpoint { num = $1; gsub(/,/, "", num); ok = 1 }
-	ok && s > 0 && s >= max {
-		printf("   %10d -> %-10s: %-8s |%-38s|\n", s, "", num,
-		    star(num, most, 38))
+	ok && i >= max_i {
+		printf("   %10d -> %-10s: %-8s |%-38s|\n", buckets[i],
+		    "", num, star(num, most, 38))
+		next
 	}
-	ok && s > 0 && s < max {
-		printf("   %10d -> %-10d: %-8s |%-38s|\n", s, s * pow - 1, num,
-		    star(num, most, 38))
-		s *= pow
+	ok && i >= 0 && i < max_i {
+		printf("   %10d -> %-10d: %-8s |%-38s|\n", buckets[i],
+		    buckets[i+1] - 1, num, star(num, most, 38))
+		i++
+		next
 	}
-	ok && s == 0 {
-		printf("   %10d    %-10s: %-8s |%-38s|\n", 0, "", num,
-		    star(num, most, 38))
-		s = 1
+	ok && i == -1 {
+		printf("   %10s -> %-10d: %-8s |%-38s|\n", "",
+		    buckets[0] - 1, num, star(num, most, 38))
+		i++
 	}
 '