mirror of
https://github.com/torvalds/linux.git
synced 2025-11-30 23:16:01 +07:00
Convert the ->flowic_tos field of struct flowi_common from __u8 to dscp_t, rename it ->flowic_dscp and propagate these changes to struct flowi and struct flowi4. We've had several bugs in the past where ECN bits could interfere with IPv4 routing, because these bits were not properly cleared when setting ->flowi4_tos. These bugs should be fixed now and the dscp_t type has been introduced to ensure that variables carrying DSCP values don't accidentally have any ECN bits set. Several variables and structure fields have been converted to dscp_t already, but the main IPv4 routing structure, struct flowi4, is still using a __u8. To avoid any future regression, this patch converts it to dscp_t. There are many users to convert at once. Fortunately, around half of ->flowi4_tos users already have a dscp_t value at hand, which they currently convert to __u8 using inet_dscp_to_dsfield(). For all of these users, we just need to drop that conversion. But, although we try to do the __u8 <-> dscp_t conversions at the boundaries of the network or of user space, some places still store TOS/DSCP variables as __u8 in core networking code. Those can hardly be converted either because the data structure is part of UAPI or because the same variable or field is also used for handling ECN in other parts of the code. In all of these cases where we don't have a dscp_t variable at hand, we need to use inet_dsfield_to_dscp() when interacting with ->flowi4_dscp. Changes since v1: * Fix space alignment in __bpf_redirect_neigh_v4() (Ido). Signed-off-by: Guillaume Nault <gnault@redhat.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Link: https://patch.msgid.link/29acecb45e911d17446b9a3dbdb1ab7b821ea371.1756128932.git.gnault@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
100 lines
2.7 KiB
C
100 lines
2.7 KiB
C
/*
|
|
* IPv4 specific functions of netfilter core
|
|
*
|
|
* Rusty Russell (C) 2000 -- This code is GPL.
|
|
* Patrick McHardy (C) 2006-2012
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/netfilter.h>
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/export.h>
|
|
#include <net/flow.h>
|
|
#include <net/route.h>
|
|
#include <net/xfrm.h>
|
|
#include <net/ip.h>
|
|
#include <net/netfilter/nf_queue.h>
|
|
|
|
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
|
|
int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
|
|
{
|
|
struct net_device *dev = skb_dst_dev(skb);
|
|
const struct iphdr *iph = ip_hdr(skb);
|
|
struct rtable *rt;
|
|
struct flowi4 fl4 = {};
|
|
__be32 saddr = iph->saddr;
|
|
__u8 flags;
|
|
struct flow_keys flkeys;
|
|
unsigned int hh_len;
|
|
|
|
sk = sk_to_full_sk(sk);
|
|
flags = sk ? inet_sk_flowi_flags(sk) : 0;
|
|
|
|
if (addr_type == RTN_UNSPEC)
|
|
addr_type = inet_addr_type_dev_table(net, dev, saddr);
|
|
if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
|
|
flags |= FLOWI_FLAG_ANYSRC;
|
|
else
|
|
saddr = 0;
|
|
|
|
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
|
|
* packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
|
|
*/
|
|
fl4.daddr = iph->daddr;
|
|
fl4.saddr = saddr;
|
|
fl4.flowi4_dscp = ip4h_dscp(iph);
|
|
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
|
|
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
|
|
fl4.flowi4_mark = skb->mark;
|
|
fl4.flowi4_flags = flags;
|
|
fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys);
|
|
rt = ip_route_output_key(net, &fl4);
|
|
if (IS_ERR(rt))
|
|
return PTR_ERR(rt);
|
|
|
|
/* Drop old route. */
|
|
skb_dst_drop(skb);
|
|
skb_dst_set(skb, &rt->dst);
|
|
|
|
if (skb_dst(skb)->error)
|
|
return skb_dst(skb)->error;
|
|
|
|
#ifdef CONFIG_XFRM
|
|
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
|
|
xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
|
|
struct dst_entry *dst = skb_dst(skb);
|
|
/* ignore return value from skb_dstref_steal, xfrm_lookup takes
|
|
* care of dropping the refcnt if needed.
|
|
*/
|
|
skb_dstref_steal(skb);
|
|
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
|
|
if (IS_ERR(dst))
|
|
return PTR_ERR(dst);
|
|
skb_dst_set(skb, dst);
|
|
}
|
|
#endif
|
|
|
|
/* Change in oif may mean change in hh_len. */
|
|
hh_len = skb_dst_dev(skb)->hard_header_len;
|
|
if (skb_headroom(skb) < hh_len &&
|
|
pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
|
|
0, GFP_ATOMIC))
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(ip_route_me_harder);
|
|
|
|
int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
|
|
bool strict __always_unused)
|
|
{
|
|
struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
|
|
if (IS_ERR(rt))
|
|
return PTR_ERR(rt);
|
|
*dst = &rt->dst;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nf_ip_route);
|