improve error path, and minor adjustments.

This commit is contained in:
Brendan Gregg
2014-09-08 15:00:58 -07:00
parent e5ef86c467
commit 83c84e78c9
3 changed files with 101 additions and 53 deletions

View File

@@ -65,6 +65,24 @@ This looks like a fast retransmit (inclusion of tcp_fastretrans_alert(), and
being based on receiving an ACK, rather than a timer). being based on receiving an ACK, rather than a timer).
The -l option will include TCP tail loss probe events (TLP; see
http://lwn.net/Articles/542642/). Eg:
# ./tcpretrans -l
TIME PID LADDR:LPORT -- RADDR:RPORT STATE
21:56:06 0 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK
21:56:08 0 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK
21:56:10 16452 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK
21:56:10 0 10.100.155.200:22 L> 10.10.237.72:46408 LAST_ACK
21:56:10 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK
21:56:12 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK
21:56:13 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK
^C
Ending tracing...
Look for "L>" in the type column ("--") for TLP events.
Use -h to print the USAGE message: Use -h to print the USAGE message:
# ./tcpretrans -h # ./tcpretrans -h

View File

@@ -19,7 +19,12 @@ modifications to work on different kernels and platforms. This would be better
written using a tracer such as SystemTap, and will likely be rewritten in the written using a tracer such as SystemTap, and will likely be rewritten in the
future when certain tracing features are added to the Linux kernel. future when certain tracing features are added to the Linux kernel.
Currently only IPv4 is supported. When \-l is used, this also uses dynamic tracing of tcp_send_loss_probe() and
a register.
Currently only IPv4 is supported, on x86_64. If you try this on a different
architecture, you'll likely need to adjust the register locations (search
for %di).
Since this uses ftrace, only the root user can use this tool. Since this uses ftrace, only the root user can use this tool.
.SH REQUIREMENTS .SH REQUIREMENTS
@@ -34,7 +39,7 @@ Print usage message.
\-s \-s
Include kernel stack traces. Include kernel stack traces.
.TP .TP
\-p \-l
Include TCP tail loss probes. Include TCP tail loss probes.
.SH EXAMPLES .SH EXAMPLES
.TP .TP
@@ -56,7 +61,7 @@ LPORT
Local port. Local port.
.TP .TP
\-\- \-\-
Packet type: "R>" for retransmit. Packet type: "R>" for retransmit, and "L>" for tail loss probe.
.TP .TP
RADDR RADDR
Remote address. Remote address.
@@ -66,9 +71,6 @@ Remote port.
.TP .TP
STATE STATE
TCP session state. TCP session state.
.TP
TLP
Tail loss probe: "Y/N".
.SH OVERHEAD .SH OVERHEAD
The CPU overhead is relative to the rate of TCP retransmits, and is The CPU overhead is relative to the rate of TCP retransmits, and is
designed to be low as this does not examine every packet. Once per second the designed to be low as this does not examine every packet. Once per second the

View File

@@ -8,10 +8,11 @@
# sent by the kernel on timeouts). To keep overhead low, only # sent by the kernel on timeouts). To keep overhead low, only
# tcp_retransmit_skb() calls are traced (this does not trace every packet). # tcp_retransmit_skb() calls are traced (this does not trace every packet).
# #
# USAGE: ./tcpretrans [-hsp] # USAGE: ./tcpretrans [-hls]
# #
# REQUIREMENTS: FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function. # REQUIREMENTS: FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function,
# You may have these already have these on recent kernels. And Perl. # and tcp_send_loss_probe() when -l is used. You may have these already have
# these on recent kernels. And Perl.
# #
# This was written as a proof of concept for ftrace, for older Linux systems, # This was written as a proof of concept for ftrace, for older Linux systems,
# and without kernel debuginfo. It uses dynamic tracing of tcp_retransmit_skb(), # and without kernel debuginfo. It uses dynamic tracing of tcp_retransmit_skb(),
@@ -21,7 +22,12 @@
# written using a tracer such as SystemTap, and will likely be rewritten in the # written using a tracer such as SystemTap, and will likely be rewritten in the
# future when certain tracing features are added to the Linux kernel. # future when certain tracing features are added to the Linux kernel.
# #
# Currently only IPv4 is supported. # When -l is used, this also uses dynamic tracing of tcp_send_loss_probe() and
# a register.
#
# Currently only IPv4 is supported, on x86_64. If you try this on a different
# architecture, you'll likely need to adjust the register locations (search
# for %di).
# #
# OVERHEAD: The CPU overhead is relative to the rate of TCP retransmits, and is # OVERHEAD: The CPU overhead is relative to the rate of TCP retransmits, and is
# designed to be low as this does not examine every packet. Once per second the # designed to be low as this does not examine every packet. Once per second the
@@ -70,25 +76,34 @@ $| = 1;
my ($help, $stacks, $tlp); my ($help, $stacks, $tlp);
GetOptions("help|h" => \$help, GetOptions("help|h" => \$help,
"stacks|s" => \$stacks, "stacks|s" => \$stacks,
"tlp|p" => \$tlp) "tlp|l" => \$tlp)
or usage(); or usage();
usage() if $help; usage() if $help;
sub usage { sub usage {
print STDERR "USAGE: tcpretrans [-hsp]\n"; print STDERR "USAGE: tcpretrans [-hls]\n";
print STDERR " -h # help message\n"; print STDERR " -h # help message\n";
print STDERR " -s # print stack traces\n"; print STDERR " -l # trace TCP tail loss probes\n";
print STDERR " -p # trace TCP tail loss probes\n"; print STDERR " -s # print stack traces\n";
print STDERR " eg,\n"; print STDERR " eg,\n";
print STDERR " tcpretrans # trace TCP retransmits\n"; print STDERR " tcpretrans # trace TCP retransmits\n";
exit; exit;
} }
# delete lock and die
sub ldie { sub ldie {
unlink $flock; unlink $flock;
die @_; die @_;
} }
# end tracing (silently) and die
sub edie {
print STDERR "@_\n";
close STDOUT;
close STDERR;
cleanup();
}
sub writeto { sub writeto {
my ($string, $file) = @_; my ($string, $file) = @_;
open FILE, ">$file" or return 0; open FILE, ">$file" or return 0;
@@ -103,6 +118,23 @@ sub appendto {
close FILE or return 0; close FILE or return 0;
} }
# kprobe functions
sub create_kprobe {
my ($kname, $kval) = @_;
appendto "p:$kname $kval", "kprobe_events" or return 0;
}
sub enable_kprobe {
my ($kname) = @_;
writeto "1", "events/kprobes/$kname/enable" or return 0;
}
sub remove_kprobe {
my ($kname) = @_;
writeto "0", "events/kprobes/$kname/enable" or return 0;
appendto "-:$kname", "kprobe_events" or return 0;
}
# tcp socket cache # tcp socket cache
my %tcp; my %tcp;
sub cache_tcp { sub cache_tcp {
@@ -142,27 +174,6 @@ sub inet_h2a {
return join(".", @addr); return join(".", @addr);
} }
sub create_kprobe {
my ($kname, $kval) = @_;
appendto "p:$kname $kval", "kprobe_events"
or ldie "ERROR: creating kprobe for $kname.";
}
sub enable_kprobe {
my ($kname) = @_;
unless (writeto "1", "events/kprobes/$kname/enable") {
appendto "-:$kname", "kprobe_events";
ldie "ERROR: enabling kprobe.";
}
}
sub remove_kprobe {
my ($kname) = @_;
writeto "0", "events/kprobes/$kname/enable"
or ldie "ERROR: disabling kprobe $kname";
appendto "-:$kname", "kprobe_events";
}
### check permissions ### check permissions
chdir "$tracing" or die "ERROR: accessing tracing. Root? Kernel has FTRACE?" . chdir "$tracing" or die "ERROR: accessing tracing. Root? Kernel has FTRACE?" .
"\ndebugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"; "\ndebugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)";
@@ -174,20 +185,32 @@ if (-e $flock) {
} }
writeto "$$", $flock or die "ERROR: unable to write $flock."; writeto "$$", $flock or die "ERROR: unable to write $flock.";
### setup and begin tracing #
# Setup and begin tracing.
# Use of ldie() and edie() ensures that if an error is encountered, the
# kernel is not left in a partially configured state.
#
writeto "nop", "current_tracer" or ldie "ERROR: disabling current_tracer."; writeto "nop", "current_tracer" or ldie "ERROR: disabling current_tracer.";
my ($kname, $kname_tlp) = ("tcpretrans_tcp_retransmit_skb", "tcpretrans_tcp_send_loss_probe"); my $kname_rtr = "tcpretrans_tcp_retransmit_skb";
create_kprobe $kname, "tcp_retransmit_skb sk=%di"; my $kname_tlp = "tcpretrans_tcp_send_loss_probe";
create_kprobe $kname_tlp, "tcp_send_loss_probe sk=%di" if $tlp; create_kprobe $kname_rtr, "tcp_retransmit_skb sk=%di" or
ldie "ERROR: creating kprobe for tcp_retransmit_skb().";;
if ($tlp) {
create_kprobe $kname_tlp, "tcp_send_loss_probe sk=%di" or
edie "ERROR: creating kprobe for tcp_send_loss_probe(). " .
"Older kernel version?";
}
if ($stacks) { if ($stacks) {
writeto "1", "options/stacktrace" or print STDERR "WARNING: " . writeto "1", "options/stacktrace" or print STDERR "WARNING: " .
"unable to enable stacktraces."; "unable to enable stacktraces.\n";
}
enable_kprobe $kname_rtr or edie "ERROR: enabling $kname_rtr probe.";
if ($tlp) {
enable_kprobe $kname_tlp or edie "ERROR: enabling $kname_tlp probe.";
} }
enable_kprobe $kname;
enable_kprobe $kname_tlp if $tlp;
map_tcp_states(); map_tcp_states();
printf "%-8s %-6s %-20s -- %-20s %-12s %s\n", "TIME", "PID", printf "%-8s %-6s %-20s -- %-20s %-12s\n", "TIME", "PID", "LADDR:LPORT",
"LADDR:LPORT", "RADDR:RPORT", "STATE", "TLP"; "RADDR:RPORT", "STATE";
# #
# Read and print event data. This loop waits one second then reads the buffered # Read and print event data. This loop waits one second then reads the buffered
@@ -200,14 +223,14 @@ while (1) {
sleep $interval; sleep $interval;
# buffer trace data # buffer trace data
open TPIPE, "trace" or ldie "ERROR: opening trace_pipe."; open TPIPE, "trace" or edie "ERROR: opening trace_pipe.";
my @trace = (); my @trace = ();
while (<TPIPE>) { while (<TPIPE>) {
next if /^#/; next if /^#/;
push @trace, $_; push @trace, $_;
} }
close TPIPE; close TPIPE;
writeto "0", "trace" or ldie "ERROR: clearing trace"; writeto "0", "trace" or edie "ERROR: clearing trace";
# cache /proc/net/tcp state # cache /proc/net/tcp state
if (scalar @trace) { if (scalar @trace) {
@@ -246,8 +269,9 @@ while (1) {
} }
my $now = strftime "%H:%M:%S", localtime; my $now = strftime "%H:%M:%S", localtime;
printf "%-8s %-6s %-20s R> %-20s %-12s %s\n", $now, $pid, printf "%-8s %-6s %-20s %s> %-20s %-12s\n", $now, $pid,
"$laddr:$lport", "$raddr:$rport", $state, $rest =~ /$kname_tlp/ ? "Y" : "N"; "$laddr:$lport", $rest =~ /$kname_tlp/ ? "L" : "R",
"$raddr:$rport", $state,
} }
} }
@@ -259,10 +283,14 @@ sub cleanup {
close TPIPE; close TPIPE;
if ($stacks) { if ($stacks) {
writeto "0", "options/stacktrace" or print STDERR "WARNING: " . writeto "0", "options/stacktrace" or print STDERR "WARNING: " .
"unable to disable stacktraces."; "unable to disable stacktraces.\n";
}
remove_kprobe $kname_rtr
or print STDERR "ERROR: removing kprobe $kname_rtr\n";
if ($tlp) {
remove_kprobe $kname_tlp
or print STDERR "ERROR: removing kprobe $kname_tlp\n";
} }
remove_kprobe $kname;
remove_kprobe $kname_tlp if $tlp;
writeto "", "trace"; writeto "", "trace";
unlink $flock; unlink $flock;
exit; exit;