mirror of
https://github.com/brendangregg/perf-tools.git
synced 2025-11-30 23:16:03 +07:00
312 lines
8.8 KiB
Perl
Executable File
312 lines
8.8 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# tcpretrans - show TCP retransmts, with address and other details.
|
|
# Written using Linux ftrace.
|
|
#
|
|
# This traces TCP retransmits, showing address, port, and TCP state information,
|
|
# and sometimes the PID (although usually not, since retransmits are usually
|
|
# sent by the kernel on timeouts). To keep overhead low, only
|
|
# tcp_retransmit_skb() calls are traced (this does not trace every packet).
|
|
#
|
|
# USAGE: ./tcpretrans [-hls]
|
|
#
|
|
# REQUIREMENTS: FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function,
|
|
# and tcp_send_loss_probe() when -l is used. You may have these already have
|
|
# these on recent kernels. And Perl.
|
|
#
|
|
# This was written as a proof of concept for ftrace, for older Linux systems,
|
|
# and without kernel debuginfo. It uses dynamic tracing of tcp_retransmit_skb(),
|
|
# and reads /proc/net/tcp for socket details. Its use of dynamic tracing and
|
|
# CPU registers is an unstable platform-specific workaround, and may require
|
|
# modifications to work on different kernels and platforms. This would be better
|
|
# written using a tracer such as SystemTap, and will likely be rewritten in the
|
|
# future when certain tracing features are added to the Linux kernel.
|
|
#
|
|
# When -l is used, this also uses dynamic tracing of tcp_send_loss_probe() and
|
|
# a register.
|
|
#
|
|
# Currently only IPv4 is supported, on x86_64. If you try this on a different
|
|
# architecture, you'll likely need to adjust the register locations (search
|
|
# for %di).
|
|
#
|
|
# OVERHEAD: The CPU overhead is relative to the rate of TCP retransmits, and is
|
|
# designed to be low as this does not examine every packet. Once per second the
|
|
# /proc/net/tcp file is read, and a buffer of retransmit trace events is
|
|
# retrieved from the kernel and processed.
|
|
#
|
|
# From perf-tools: https://github.com/brendangregg/perf-tools
|
|
#
|
|
# See the tcpretrans(8) man page (in perf-tools) for more info.
|
|
#
|
|
# COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software Foundation,
|
|
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
#
|
|
# (http://www.gnu.org/copyleft/gpl.html)
|
|
#
|
|
# 28-Jul-2014 Brendan Gregg Created this.
|
|
|
|
use strict;
|
|
use warnings;
|
|
use POSIX qw(strftime);
|
|
use Getopt::Long;
|
|
my $tracing = "/sys/kernel/debug/tracing";
|
|
my $flock = "/var/tmp/.ftrace-lock";
|
|
my $interval = 1;
|
|
local $SIG{INT} = \&cleanup;
|
|
local $SIG{QUIT} = \&cleanup;
|
|
local $SIG{TERM} = \&cleanup;
|
|
local $SIG{PIPE} = \&cleanup;
|
|
local $SIG{HUP} = \&cleanup;
|
|
$| = 1;
|
|
|
|
### options
|
|
my ($help, $stacks, $tlp);
|
|
GetOptions("help|h" => \$help,
|
|
"stacks|s" => \$stacks,
|
|
"tlp|l" => \$tlp)
|
|
or usage();
|
|
usage() if $help;
|
|
|
|
sub usage {
|
|
print STDERR "USAGE: tcpretrans [-hls]\n";
|
|
print STDERR " -h # help message\n";
|
|
print STDERR " -l # trace TCP tail loss probes\n";
|
|
print STDERR " -s # print stack traces\n";
|
|
print STDERR " eg,\n";
|
|
print STDERR " tcpretrans # trace TCP retransmits\n";
|
|
exit;
|
|
}
|
|
|
|
# delete lock and die
|
|
sub ldie {
|
|
unlink $flock;
|
|
die @_;
|
|
}
|
|
|
|
# end tracing (silently) and die
|
|
sub edie {
|
|
print STDERR "@_\n";
|
|
close STDOUT;
|
|
close STDERR;
|
|
cleanup();
|
|
}
|
|
|
|
sub writeto {
|
|
my ($string, $file) = @_;
|
|
open FILE, ">$file" or return 0;
|
|
print FILE $string or return 0;
|
|
close FILE or return 0;
|
|
}
|
|
|
|
sub appendto {
|
|
my ($string, $file) = @_;
|
|
open FILE, ">>$file" or return 0;
|
|
print FILE $string or return 0;
|
|
close FILE or return 0;
|
|
}
|
|
|
|
# kprobe functions
|
|
sub create_kprobe {
|
|
my ($kname, $kval) = @_;
|
|
appendto "p:$kname $kval", "kprobe_events" or return 0;
|
|
}
|
|
|
|
sub enable_kprobe {
|
|
my ($kname) = @_;
|
|
writeto "1", "events/kprobes/$kname/enable" or return 0;
|
|
}
|
|
|
|
sub remove_kprobe {
|
|
my ($kname) = @_;
|
|
writeto "0", "events/kprobes/$kname/enable" or return 0;
|
|
appendto "-:$kname", "kprobe_events" or return 0;
|
|
}
|
|
|
|
# tcp socket cache
|
|
my %tcp;
|
|
sub cache_tcp {
|
|
undef %tcp;
|
|
open(TCP, "/proc/net/tcp") or ldie "ERROR: reading /proc/net/tcp.";
|
|
while (<TCP>) {
|
|
next if /^ *sl/;
|
|
my ($sl, $local_address, $rem_address, $st, $tx_rx, $tr_tm,
|
|
$retrnsmt, $uid, $timeout, $inode, $jf, $sk) = split;
|
|
$sk =~ s/^0x//;
|
|
$tcp{$sk}{laddr} = $local_address;
|
|
$tcp{$sk}{raddr} = $rem_address;
|
|
$tcp{$sk}{state} = $st;
|
|
}
|
|
close TCP;
|
|
}
|
|
|
|
my @tcpstate;
|
|
sub map_tcp_states {
|
|
push @tcpstate, "NULL";
|
|
for (<DATA>) {
|
|
chomp;
|
|
s/.*TCP_//;
|
|
s/[, ].*$//;
|
|
push @tcpstate, $_;
|
|
}
|
|
}
|
|
|
|
# /proc/net/tcp hex addr to dotted quad decimal
|
|
sub inet_h2a {
|
|
my ($haddr) = @_;
|
|
|
|
my @addr = ();
|
|
for my $num ($haddr =~ /(..)(..)(..)(..)/) {
|
|
unshift @addr, hex($num);
|
|
}
|
|
return join(".", @addr);
|
|
}
|
|
|
|
### check permissions
|
|
chdir "$tracing" or die "ERROR: accessing tracing. Root? Kernel has FTRACE?" .
|
|
"\ndebugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)";
|
|
|
|
### ftrace lock
|
|
if (-e $flock) {
|
|
open FLOCK, $flock; my $fpid = <FLOCK>; chomp $fpid; close FLOCK;
|
|
die "ERROR: ftrace may be in use by PID $fpid ($flock)";
|
|
}
|
|
writeto "$$", $flock or die "ERROR: unable to write $flock.";
|
|
|
|
#
|
|
# Setup and begin tracing.
|
|
# Use of ldie() and edie() ensures that if an error is encountered, the
|
|
# kernel is not left in a partially configured state.
|
|
#
|
|
writeto "nop", "current_tracer" or ldie "ERROR: disabling current_tracer.";
|
|
my $kname_rtr = "tcpretrans_tcp_retransmit_skb";
|
|
my $kname_tlp = "tcpretrans_tcp_send_loss_probe";
|
|
create_kprobe $kname_rtr, "tcp_retransmit_skb sk=%di" or
|
|
ldie "ERROR: creating kprobe for tcp_retransmit_skb().";;
|
|
if ($tlp) {
|
|
create_kprobe $kname_tlp, "tcp_send_loss_probe sk=%di" or
|
|
edie "ERROR: creating kprobe for tcp_send_loss_probe(). " .
|
|
"Older kernel version?";
|
|
}
|
|
if ($stacks) {
|
|
writeto "1", "options/stacktrace" or print STDERR "WARNING: " .
|
|
"unable to enable stacktraces.\n";
|
|
}
|
|
enable_kprobe $kname_rtr or edie "ERROR: enabling $kname_rtr probe.";
|
|
if ($tlp) {
|
|
enable_kprobe $kname_tlp or edie "ERROR: enabling $kname_tlp probe.";
|
|
}
|
|
map_tcp_states();
|
|
printf "%-8s %-6s %-20s -- %-20s %-12s\n", "TIME", "PID", "LADDR:LPORT",
|
|
"RADDR:RPORT", "STATE";
|
|
|
|
#
|
|
# Read and print event data. This loop waits one second then reads the buffered
|
|
# trace data, then caches /proc/net/tcp, then iterates over the buffered trace
|
|
# data using the cached state. While this minimizes CPU overheads, it only
|
|
# works because sockets that are retransmitting are usually long lived, and
|
|
# remain in /proc/net/tcp for at least our sleep interval.
|
|
#
|
|
while (1) {
|
|
sleep $interval;
|
|
|
|
# buffer trace data
|
|
open TPIPE, "trace" or edie "ERROR: opening trace_pipe.";
|
|
my @trace = ();
|
|
while (<TPIPE>) {
|
|
next if /^#/;
|
|
push @trace, $_;
|
|
}
|
|
close TPIPE;
|
|
writeto "0", "trace" or edie "ERROR: clearing trace";
|
|
|
|
# cache /proc/net/tcp state
|
|
if (scalar @trace) {
|
|
cache_tcp();
|
|
}
|
|
|
|
# process and print events
|
|
for (@trace) {
|
|
if ($stacks && /^ *=>/) {
|
|
print $_;
|
|
next;
|
|
}
|
|
|
|
my ($taskpid, $rest) = split ' ', $_, 2;
|
|
my ($task, $pid) = $taskpid =~ /(.*)-(\d+)/;
|
|
|
|
my ($skp) = $rest =~ /sk=([0-9a-fx]*)/;
|
|
next unless defined $skp and $skp ne "";
|
|
$skp =~ s/^0x//;
|
|
|
|
my ($laddr, $lport, $raddr, $rport, $state);
|
|
if (defined $tcp{$skp}) {
|
|
# convert /proc/net/tcp hex to dotted quads
|
|
my ($hladdr, $hlport) = split /:/, $tcp{$skp}{laddr};
|
|
my ($hraddr, $hrport) = split /:/, $tcp{$skp}{raddr};
|
|
$laddr = inet_h2a($hladdr);
|
|
$raddr = inet_h2a($hraddr);
|
|
$lport = hex($hlport);
|
|
$rport = hex($hrport);
|
|
$state = $tcpstate[hex($tcp{$skp}{state})];
|
|
} else {
|
|
# socket closed too quickly
|
|
($laddr, $raddr) = ("-", "-");
|
|
($lport, $rport) = ("-", "-");
|
|
$state = "-";
|
|
}
|
|
|
|
my $now = strftime "%H:%M:%S", localtime;
|
|
printf "%-8s %-6s %-20s %s> %-20s %-12s\n", $now, $pid,
|
|
"$laddr:$lport", $rest =~ /$kname_tlp/ ? "L" : "R",
|
|
"$raddr:$rport", $state,
|
|
}
|
|
}
|
|
|
|
### end tracing
|
|
cleanup();
|
|
|
|
sub cleanup {
|
|
print "\nEnding tracing...\n";
|
|
close TPIPE;
|
|
if ($stacks) {
|
|
writeto "0", "options/stacktrace" or print STDERR "WARNING: " .
|
|
"unable to disable stacktraces.\n";
|
|
}
|
|
remove_kprobe $kname_rtr
|
|
or print STDERR "ERROR: removing kprobe $kname_rtr\n";
|
|
if ($tlp) {
|
|
remove_kprobe $kname_tlp
|
|
or print STDERR "ERROR: removing kprobe $kname_tlp\n";
|
|
}
|
|
writeto "", "trace";
|
|
unlink $flock;
|
|
exit;
|
|
}
|
|
|
|
# from /usr/include/netinet/tcp.h:
|
|
__DATA__
|
|
TCP_ESTABLISHED = 1,
|
|
TCP_SYN_SENT,
|
|
TCP_SYN_RECV,
|
|
TCP_FIN_WAIT1,
|
|
TCP_FIN_WAIT2,
|
|
TCP_TIME_WAIT,
|
|
TCP_CLOSE,
|
|
TCP_CLOSE_WAIT,
|
|
TCP_LAST_ACK,
|
|
TCP_LISTEN,
|
|
TCP_CLOSING /* now a valid state */
|