diff mbox series

[v2,net,1/5] icmp: introduce helper for NAT'd source address in network device context

Message ID 20200210141423.173790-2-Jason@zx2c4.com
State Superseded
Delegated to: David Miller
Headers show
Series icmp: account for NAT when sending icmps from ndo layer | expand

Commit Message

Jason A. Donenfeld Feb. 10, 2020, 2:14 p.m. UTC
This introduces a helper function to be called only by network drivers
that wraps calls to icmp[v6]_send in a conntrack transformation, in case
NAT has been used. The transformation happens only on a non-shared skb,
and the skb is fixed back up to its original state after, in case the
calling code continues to use it.

We don't want to pollute the non-driver path, though, so we introduce
this as a helper to be called by places that actually make use of
this, as suggested by Florian.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Florian Westphal <fw@strlen.de>
---
 include/linux/icmpv6.h |  6 ++++++
 include/net/icmp.h     |  6 ++++++
 net/ipv4/icmp.c        | 29 +++++++++++++++++++++++++++++
 net/ipv6/ip6_icmp.c    | 30 ++++++++++++++++++++++++++++++
 4 files changed, 71 insertions(+)

Comments

Jason A. Donenfeld Feb. 10, 2020, 7:48 p.m. UTC | #1
On Mon, Feb 10, 2020 at 3:15 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> +               ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
> +       }
> +       icmp_send(skb_in, type, code, info);

According to the comments in icmp_send, access to
ip_hdr(skb_in)->saddr requires first checking for `if
(skb_network_header(skb_in) < skb_in->head ||
(skb_network_header(skb_in) + sizeof(struct iphdr)) >
skb_tail_pointer(skb_in))` first to be safe. So, I'll fix this up for
v3, but will wait some time in case there are additional comments.

Jason
Florian Westphal Feb. 10, 2020, 9:32 p.m. UTC | #2
Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> On Mon, Feb 10, 2020 at 3:15 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > +               ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
> > +       }
> > +       icmp_send(skb_in, type, code, info);
> 
> According to the comments in icmp_send, access to
> ip_hdr(skb_in)->saddr requires first checking for `if
> (skb_network_header(skb_in) < skb_in->head ||
> (skb_network_header(skb_in) + sizeof(struct iphdr)) >
> skb_tail_pointer(skb_in))` first to be safe.

You will probably also need skb_ensure_writable() to handle cloned skbs.

I also suggest to check "ct->status & IPS_NAT_MASK", nat is only done if
those bits are set.
Jason A. Donenfeld Feb. 10, 2020, 9:59 p.m. UTC | #3
Hi Florian,

On Mon, Feb 10, 2020 at 10:33 PM Florian Westphal <fw@strlen.de> wrote:
>
> Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > On Mon, Feb 10, 2020 at 3:15 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > > +               ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
> > > +       }
> > > +       icmp_send(skb_in, type, code, info);
> >
> > According to the comments in icmp_send, access to
> > ip_hdr(skb_in)->saddr requires first checking for `if
> > (skb_network_header(skb_in) < skb_in->head ||
> > (skb_network_header(skb_in) + sizeof(struct iphdr)) >
> > skb_tail_pointer(skb_in))` first to be safe.
>
> You will probably also need skb_ensure_writable() to handle cloned skbs.
>
> I also suggest to check "ct->status & IPS_NAT_MASK", nat is only done if
> those bits are set.

Thanks for the suggestions. I've made these changes and they're queued
up for a v3, currently staged in wireguard-linux.git's stable branch:
https://git.zx2c4.com/wireguard-linux/log/?h=stable

Jason
Florian Westphal Feb. 10, 2020, 10:26 p.m. UTC | #4
Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> Hi Florian,
> 
> On Mon, Feb 10, 2020 at 10:33 PM Florian Westphal <fw@strlen.de> wrote:
> >
> > Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > > On Mon, Feb 10, 2020 at 3:15 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > > > +               ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
> > > > +       }
> > > > +       icmp_send(skb_in, type, code, info);
> > >
> > > According to the comments in icmp_send, access to
> > > ip_hdr(skb_in)->saddr requires first checking for `if
> > > (skb_network_header(skb_in) < skb_in->head ||
> > > (skb_network_header(skb_in) + sizeof(struct iphdr)) >
> > > skb_tail_pointer(skb_in))` first to be safe.
> >
> > You will probably also need skb_ensure_writable() to handle cloned skbs.
> >
> > I also suggest to check "ct->status & IPS_NAT_MASK", nat is only done if
> > those bits are set.
> 
> Thanks for the suggestions. I've made these changes and they're queued
> up for a v3, currently staged in wireguard-linux.git's stable branch:
> https://git.zx2c4.com/wireguard-linux/log/?h=stable

I think this is a bit too conservative, f.e. i don't see how
ndo-called skbs could be shared (tx path needs to be able to change skb
list pointers)?

If its needed it looks ok.

Otherwise, I would suggest something like this:

void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
   enum ip_conntrack_info ctinfo;
   struct nf_conn *ct;
   __be32 orig_ip;

   ct = nf_ct_get(skb_in, &ctinfo);
   if (!ct || ((ct->status & IPS_NAT_MASK) == 0) {
      icmp_send(skb_in, type, code, info);
      return;
    }

   /* avoid reallocations */
   if (skb_network_header(skb_in) < skb_in->head ||
       (skb_network_header(skb_in) + sizeof(struct iphdr)) >
        skb_tail_pointer(skb_in))
       return;

    /* handle clones. NB: if reallocations are to be avoided, then
     * if (skb_cloned(skb_in) &&
     * !skb_clone_writable(skb_in, skb_network_offset(skb_in) + iphlen))
     *
     * ... should be placed here instead:
     */
    if (unlikely(skb_ensure_writable(skb_in,
                 skb_network_offset(skb_in) + sizeof(struct iphdr))))
        return;

    orig_ip = ip_hdr(skb_in)->saddr;
    ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
    icmp_send(skb_in, type, code, info);
    ip_hdr(skb_in)->saddr = orig_ip;
}
Jason A. Donenfeld Feb. 11, 2020, 9:59 a.m. UTC | #5
On Mon, Feb 10, 2020 at 11:33 PM Florian Westphal <fw@strlen.de> wrote:
> I think this is a bit too conservative, f.e. i don't see how
> ndo-called skbs could be shared (tx path needs to be able to change skb
> list pointers)?

Are you *sure* about that? Dave - do you know?

I'm asking with *asterisks* because I see a few drivers in tree that
do call skb_share_check from their ndo_start_xmit function. If this
makes no sense and isn't needed, then I'll send a cleanup series for
these. And, I'll remove it from icmp_ndo_send for v3, replacing it
with a `if (WARN_ON(skb_shared(skb_in))) return;`.

Thanks,
Jason
Jason A. Donenfeld Feb. 11, 2020, 12:25 p.m. UTC | #6
On Mon, Feb 10, 2020 at 10:33 PM Florian Westphal <fw@strlen.de> wrote:
> I also suggest to check "ct->status & IPS_NAT_MASK", nat is only done if
> those bits are set.

We can optimize even further, because we only care about the IPS_SRC_NAT case.
diff mbox series

Patch

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index ef1cbb5f454f..93338fd54af8 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -31,6 +31,12 @@  static inline void icmpv6_send(struct sk_buff *skb,
 }
 #endif
 
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
+#else
+#define icmpv6_ndo_send icmpv6_send
+#endif
+
 extern int				icmpv6_init(void);
 extern int				icmpv6_err_convert(u8 type, u8 code,
 							   int *err);
diff --git a/include/net/icmp.h b/include/net/icmp.h
index 5d4bfdba9adf..9ac2d2672a93 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -43,6 +43,12 @@  static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32
 	__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
 }
 
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+#else
+#define icmp_ndo_send icmp_send
+#endif
+
 int icmp_rcv(struct sk_buff *skb);
 int icmp_err(struct sk_buff *skb, u32 info);
 int icmp_init(void);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 18068ed42f25..5ca36181d4f4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -748,6 +748,35 @@  out:;
 }
 EXPORT_SYMBOL(__icmp_send);
 
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+	struct sk_buff *cloned_skb = NULL;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	__be32 orig_ip;
+
+	ct = nf_ct_get(skb_in, &ctinfo);
+	if (ct) {
+		if (skb_shared(skb_in)) {
+			skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+			if (unlikely(!skb_in))
+				return;
+		}
+		orig_ip = ip_hdr(skb_in)->saddr;
+		ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+	}
+	icmp_send(skb_in, type, code, info);
+	if (ct) {
+		if (cloned_skb)
+			consume_skb(cloned_skb);
+		else
+			ip_hdr(skb_in)->saddr = orig_ip;
+	}
+}
+EXPORT_SYMBOL(icmp_ndo_send);
+#endif
 
 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
 {
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 02045494c24c..ee364d61b789 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -45,4 +45,34 @@  void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(icmpv6_send);
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+	struct sk_buff *cloned_skb = NULL;
+	enum ip_conntrack_info ctinfo;
+	struct in6_addr orig_ip;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb_in, &ctinfo);
+	if (ct) {
+		if (skb_shared(skb_in)) {
+			skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+			if (unlikely(!skb_in))
+				return;
+		}
+		orig_ip = ipv6_hdr(skb_in)->saddr;
+		ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+	}
+	icmpv6_send(skb_in, type, code, info);
+	if (ct) {
+		if (cloned_skb)
+			consume_skb(cloned_skb);
+		else
+			ipv6_hdr(skb_in)->saddr = orig_ip;
+	}
+}
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
 #endif