From bb6047171d2168c83a32a476bdaf942fabeee361 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Sun, 5 Nov 2017 17:36:53 -0700 Subject: [PATCH] netfilter: nf_defrag_ipv4: Add sysctl to disable per interface Add a sysctl nf_ipv4_defrag_skip to skip defragmentation per interface. This is set 0 to preserve existing behavior (always defrag per interface). This is useful for pure ipv4 forwarding scenarios (without NAT) in conjunction with xfrm. It appears that network stack defrags the packets and then forwards them to xfrm which then encrypts and then later fragments them on a different boundary compared to the source. CRs-Fixed: 2140310 Change-Id: I11956284a9692579274e8626f61cc6432232254c Signed-off-by: Subash Abhinov Kasiviswanathan --- Documentation/networking/ip-sysctl.txt | 4 ++++ include/linux/inetdevice.h | 2 ++ include/uapi/linux/ip.h | 1 + include/uapi/linux/sysctl.h | 1 + kernel/sysctl_binary.c | 1 + net/ipv4/devinet.c | 2 ++ net/ipv4/netfilter/nf_defrag_ipv4.c | 10 ++++++++-- 7 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index b0930d4b099b..b90a095ed2a5 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1306,6 +1306,10 @@ igmp_link_local_mcast_reports - BOOLEAN 224.0.0.X range. Default TRUE +nf_ipv4_defrag_skip - BOOLEAN + Skip defragmentation per interface if set. + Default : 0 (always defrag) + Alexey Kuznetsov. kuznet@ms2.inr.ac.ru diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 5058f061cb2b..f2d7ea34fd3f 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -130,6 +130,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) +#define IN_DEV_NF_IPV4_DEFRAG_SKIP(in_dev) \ + IN_DEV_ORCONF((in_dev), NF_IPV4_DEFRAG_SKIP) struct in_ifaddr { struct hlist_node hash; diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index b24a742beae5..2e3d1a38d186 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -168,6 +168,7 @@ enum IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, IPV4_DEVCONF_DROP_GRATUITOUS_ARP, + IPV4_DEVCONF_NF_IPV4_DEFRAG_SKIP, __IPV4_DEVCONF_MAX }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 1aaea7b9e3c5..9b220b655f0f 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -483,6 +483,7 @@ enum NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, + NET_IPV4_CONF_NF_IPV4_DEFRAG_SKIP = 23, }; /* /proc/sys/net/ipv4/netfilter */ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 14a370c54f75..d42e95047178 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -257,6 +257,7 @@ static const struct bin_table bin_net_ipv4_conf_vars_table[] = { { CTL_INT, NET_IPV4_CONF_NOPOLICY, "disable_policy" }, { CTL_INT, NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version" }, { CTL_INT, NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" }, + { CTL_INT, NET_IPV4_CONF_NF_IPV4_DEFRAG_SKIP, "nf_ipv4_defrag_skip" }, {} }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bffa88ecc534..83e54ea9827b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2280,6 +2280,8 @@ static struct devinet_sysctl_table { "route_localnet"), DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST, "drop_unicast_in_l2_multicast"), + DEVINET_SYSCTL_RW_ENTRY(NF_IPV4_DEFRAG_SKIP, + "nf_ipv4_defrag_skip"), }, }; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 37fe1616ca0b..9e6f9d27f5e9 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -81,8 +82,13 @@ static unsigned int ipv4_conntrack_defrag(void *priv, #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { - enum ip_defrag_users user = - nf_ct_defrag_user(state->hook, skb); + enum ip_defrag_users user; + + if (skb->dev && + IN_DEV_NF_IPV4_DEFRAG_SKIP(__in_dev_get_rcu(skb->dev))) + return NF_ACCEPT; + + user = nf_ct_defrag_user(state->hook, skb); if (nf_ct_ipv4_gather_frags(state->net, skb, user)) return NF_STOLEN;