#!/usr/bin/perl # # tcpretrans - show TCP retransmts, with address and other details. # Written using Linux ftrace. # # This traces TCP retransmits, showing address, port, and TCP state information, # and sometimes the PID (although usually not, since retransmits are usually # sent by the kernel on timeouts). To keep overhead low, only # tcp_retransmit_skb() calls are traced (this does not trace every packet). # # USAGE: ./tcpretrans [-hls] # # REQUIREMENTS: FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function, # and tcp_send_loss_probe() when -l is used. You may have these already have # these on recent kernels. And Perl. # # This was written as a proof of concept for ftrace, for older Linux systems, # and without kernel debuginfo. It uses dynamic tracing of tcp_retransmit_skb(), # and reads /proc/net/tcp for socket details. Its use of dynamic tracing and # CPU registers is an unstable platform-specific workaround, and may require # modifications to work on different kernels and platforms. This would be better # written using a tracer such as SystemTap, and will likely be rewritten in the # future when certain tracing features are added to the Linux kernel. # # When -l is used, this also uses dynamic tracing of tcp_send_loss_probe() and # a register. # # Currently only IPv4 is supported, on x86_64. If you try this on a different # architecture, you'll likely need to adjust the register locations (search # for %di). # # OVERHEAD: The CPU overhead is relative to the rate of TCP retransmits, and is # designed to be low as this does not examine every packet. Once per second the # /proc/net/tcp file is read, and a buffer of retransmit trace events is # retrieved from the kernel and processed. # # From perf-tools: https://github.com/brendangregg/perf-tools # # See the tcpretrans(8) man page (in perf-tools) for more info. # # COPYRIGHT: Copyright (c) 2014 Brendan Gregg. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # (http://www.gnu.org/copyleft/gpl.html) # # 28-Jul-2014 Brendan Gregg Created this. use strict; use warnings; use POSIX qw(strftime); use Getopt::Long; my $tracing = "/sys/kernel/debug/tracing"; my $flock = "/var/tmp/.ftrace-lock"; my $interval = 1; local $SIG{INT} = \&cleanup; local $SIG{QUIT} = \&cleanup; local $SIG{TERM} = \&cleanup; local $SIG{PIPE} = \&cleanup; local $SIG{HUP} = \&cleanup; $| = 1; ### options my ($help, $stacks, $tlp); GetOptions("help|h" => \$help, "stacks|s" => \$stacks, "tlp|l" => \$tlp) or usage(); usage() if $help; sub usage { print STDERR "USAGE: tcpretrans [-hls]\n"; print STDERR " -h # help message\n"; print STDERR " -l # trace TCP tail loss probes\n"; print STDERR " -s # print stack traces\n"; print STDERR " eg,\n"; print STDERR " tcpretrans # trace TCP retransmits\n"; exit; } # delete lock and die sub ldie { unlink $flock; die @_; } # end tracing (silently) and die sub edie { print STDERR "@_\n"; close STDOUT; close STDERR; cleanup(); } sub writeto { my ($string, $file) = @_; open FILE, ">$file" or return 0; print FILE $string or return 0; close FILE or return 0; } sub appendto { my ($string, $file) = @_; open FILE, ">>$file" or return 0; print FILE $string or return 0; close FILE or return 0; } # kprobe functions sub create_kprobe { my ($kname, $kval) = @_; appendto "p:$kname $kval", "kprobe_events" or return 0; } sub enable_kprobe { my ($kname) = @_; writeto "1", "events/kprobes/$kname/enable" or return 0; } sub remove_kprobe { my ($kname) = @_; writeto "0", "events/kprobes/$kname/enable" or return 0; appendto "-:$kname", "kprobe_events" or return 0; } # tcp socket cache my %tcp; sub cache_tcp { undef %tcp; open(TCP, "/proc/net/tcp") or ldie "ERROR: reading /proc/net/tcp."; while () { next if /^ *sl/; my ($sl, $local_address, $rem_address, $st, $tx_rx, $tr_tm, $retrnsmt, $uid, $timeout, $inode, $jf, $sk) = split; $sk =~ s/^0x//; $tcp{$sk}{laddr} = $local_address; $tcp{$sk}{raddr} = $rem_address; $tcp{$sk}{state} = $st; } close TCP; } my @tcpstate; sub map_tcp_states { push @tcpstate, "NULL"; for () { chomp; s/.*TCP_//; s/[, ].*$//; push @tcpstate, $_; } } # /proc/net/tcp hex addr to dotted quad decimal sub inet_h2a { my ($haddr) = @_; my @addr = (); for my $num ($haddr =~ /(..)(..)(..)(..)/) { unshift @addr, hex($num); } return join(".", @addr); } ### check permissions chdir "$tracing" or die "ERROR: accessing tracing. Root? Kernel has FTRACE?" . "\ndebugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"; ### ftrace lock if (-e $flock) { open FLOCK, $flock; my $fpid = ; chomp $fpid; close FLOCK; die "ERROR: ftrace may be in use by PID $fpid ($flock)"; } writeto "$$", $flock or die "ERROR: unable to write $flock."; # # Setup and begin tracing. # Use of ldie() and edie() ensures that if an error is encountered, the # kernel is not left in a partially configured state. # writeto "nop", "current_tracer" or ldie "ERROR: disabling current_tracer."; my $kname_rtr = "tcpretrans_tcp_retransmit_skb"; my $kname_tlp = "tcpretrans_tcp_send_loss_probe"; create_kprobe $kname_rtr, "tcp_retransmit_skb sk=%di" or ldie "ERROR: creating kprobe for tcp_retransmit_skb().";; if ($tlp) { create_kprobe $kname_tlp, "tcp_send_loss_probe sk=%di" or edie "ERROR: creating kprobe for tcp_send_loss_probe(). " . "Older kernel version?"; } if ($stacks) { writeto "1", "options/stacktrace" or print STDERR "WARNING: " . "unable to enable stacktraces.\n"; } enable_kprobe $kname_rtr or edie "ERROR: enabling $kname_rtr probe."; if ($tlp) { enable_kprobe $kname_tlp or edie "ERROR: enabling $kname_tlp probe."; } map_tcp_states(); printf "%-8s %-6s %-20s -- %-20s %-12s\n", "TIME", "PID", "LADDR:LPORT", "RADDR:RPORT", "STATE"; # # Read and print event data. This loop waits one second then reads the buffered # trace data, then caches /proc/net/tcp, then iterates over the buffered trace # data using the cached state. While this minimizes CPU overheads, it only # works because sockets that are retransmitting are usually long lived, and # remain in /proc/net/tcp for at least our sleep interval. # while (1) { sleep $interval; # buffer trace data open TPIPE, "trace" or edie "ERROR: opening trace_pipe."; my @trace = (); while () { next if /^#/; push @trace, $_; } close TPIPE; writeto "0", "trace" or edie "ERROR: clearing trace"; # cache /proc/net/tcp state if (scalar @trace) { cache_tcp(); } # process and print events for (@trace) { if ($stacks && /^ *=>/) { print $_; next; } my ($taskpid, $rest) = split ' ', $_, 2; my ($task, $pid) = $taskpid =~ /(.*)-(\d+)/; my ($skp) = $rest =~ /sk=([0-9a-fx]*)/; next unless defined $skp and $skp ne ""; $skp =~ s/^0x//; my ($laddr, $lport, $raddr, $rport, $state); if (defined $tcp{$skp}) { # convert /proc/net/tcp hex to dotted quads my ($hladdr, $hlport) = split /:/, $tcp{$skp}{laddr}; my ($hraddr, $hrport) = split /:/, $tcp{$skp}{raddr}; $laddr = inet_h2a($hladdr); $raddr = inet_h2a($hraddr); $lport = hex($hlport); $rport = hex($hrport); $state = $tcpstate[hex($tcp{$skp}{state})]; } else { # socket closed too quickly ($laddr, $raddr) = ("-", "-"); ($lport, $rport) = ("-", "-"); $state = "-"; } my $now = strftime "%H:%M:%S", localtime; printf "%-8s %-6s %-20s %s> %-20s %-12s\n", $now, $pid, "$laddr:$lport", $rest =~ /$kname_tlp/ ? "L" : "R", "$raddr:$rport", $state, } } ### end tracing cleanup(); sub cleanup { print "\nEnding tracing...\n"; close TPIPE; if ($stacks) { writeto "0", "options/stacktrace" or print STDERR "WARNING: " . "unable to disable stacktraces.\n"; } remove_kprobe $kname_rtr or print STDERR "ERROR: removing kprobe $kname_rtr\n"; if ($tlp) { remove_kprobe $kname_tlp or print STDERR "ERROR: removing kprobe $kname_tlp\n"; } writeto "", "trace"; unlink $flock; exit; } # from /usr/include/netinet/tcp.h: __DATA__ TCP_ESTABLISHED = 1, TCP_SYN_SENT, TCP_SYN_RECV, TCP_FIN_WAIT1, TCP_FIN_WAIT2, TCP_TIME_WAIT, TCP_CLOSE, TCP_CLOSE_WAIT, TCP_LAST_ACK, TCP_LISTEN, TCP_CLOSING /* now a valid state */