diff mbox series

[RFC,bpf-next] bpf: add bpf_get_skb_hash helper function

Message ID 20200810182841.10953-1-harshitha.ramamurthy@intel.com
State RFC
Delegated to: David Miller
Headers show
Series [RFC,bpf-next] bpf: add bpf_get_skb_hash helper function | expand

Commit Message

Harshitha Ramamurthy Aug. 10, 2020, 6:28 p.m. UTC
This patch adds a helper function called bpf_get_skb_hash to calculate
the skb hash for a packet at the XDP layer. In the helper function,
a local skb is allocated and we populate the fields needed in the skb
before calling skb_get_hash. To avoid memory allocations for each packet,
we allocate an skb per CPU and use the same buffer for subsequent hash
calculations on the same CPU.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
---
 include/uapi/linux/bpf.h       |  8 ++++++
 net/core/filter.c              | 50 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  8 ++++++
 3 files changed, 66 insertions(+)

Comments

David Ahern Aug. 10, 2020, 7:02 p.m. UTC | #1
On 8/10/20 12:28 PM, Harshitha Ramamurthy wrote:
> This patch adds a helper function called bpf_get_skb_hash to calculate
> the skb hash for a packet at the XDP layer. In the helper function,

Why? i.e., expected use case?

Pulling this from hardware when possible is better. e.g., Saeed's
hardware hints proposal includes it.

> a local skb is allocated and we populate the fields needed in the skb
> before calling skb_get_hash. To avoid memory allocations for each packet,
> we allocate an skb per CPU and use the same buffer for subsequent hash
> calculations on the same CPU.
> 
> Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
> ---
>  include/uapi/linux/bpf.h       |  8 ++++++
>  net/core/filter.c              | 50 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  8 ++++++
>  3 files changed, 66 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b134e679e9db..25aa850c8a40 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3394,6 +3394,13 @@ union bpf_attr {
>   *		A non-negative value equal to or less than *size* on success,
>   *		or a negative error in case of failure.
>   *
> + * u32 bpf_get_skb_hash(struct xdp_buff *xdp_md)
> + *	Description
> + *		Return the skb hash for the xdp context passed. This function
> + *		allocates a temporary skb and populates the fields needed. It
> + *		then calls skb_get_hash to calculate the skb hash for the packet.
> + *	Return
> + *		The 32-bit hash.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -3538,6 +3545,7 @@ union bpf_attr {
>  	FN(skc_to_tcp_request_sock),	\
>  	FN(skc_to_udp6_sock),		\
>  	FN(get_task_stack),		\
> +	FN(get_skb_hash),		\
>  	/* */
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 7124f0fe6974..9f6ad7209b44 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3765,6 +3765,54 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
>  	.arg3_type      = ARG_ANYTHING,
>  };
>  
> +static DEFINE_PER_CPU(struct sk_buff *, hash_skb);
> +
> +BPF_CALL_1(bpf_get_skb_hash, struct xdp_buff *, xdp)
> +{
> +	void *data_end = xdp->data_end;
> +	struct ethhdr *eth = xdp->data;
> +	void *data = xdp->data;
> +	unsigned long flags;
> +	struct sk_buff *skb;
> +	int nh_off, len;
> +	u32 ret = 0;
> +
> +	/* disable interrupts to get the correct skb pointer */
> +	local_irq_save(flags);
> +
> +	len = data_end - data;
> +	skb = this_cpu_read(hash_skb);
> +	if (!skb) {
> +		skb = alloc_skb(len, GFP_ATOMIC);
> +		if (!skb)
> +			goto out;
> +		this_cpu_write(hash_skb, skb);
> +	}
> +
> +	nh_off = sizeof(*eth);

vlans?

> +	if (data + nh_off > data_end)
> +		goto out;
> +
> +	skb->data = data;
> +	skb->head = data;
> +	skb->network_header = nh_off;
> +	skb->protocol = eth->h_proto;
> +	skb->len = len;
> +	skb->dev = xdp->rxq->dev;
> +
> +	ret = skb_get_hash(skb);

static inline __u32 skb_get_hash(struct sk_buff *skb)
{
        if (!skb->l4_hash && !skb->sw_hash)
                __skb_get_hash(skb);

        return skb->hash;
}

__skb_get_hash -> __skb_set_sw_hash -> __skb_set_hash which sets
sw_hash as a minimum, so it seems to me you will always be returning the
hash of the first packet since you do not clear relevant fields of the skb.
Harshitha Ramamurthy Aug. 12, 2020, 8:18 p.m. UTC | #2
On Mon, 2020-08-10 at 13:02 -0600, David Ahern wrote:
> On 8/10/20 12:28 PM, Harshitha Ramamurthy wrote:
> > This patch adds a helper function called bpf_get_skb_hash to
> > calculate
> > the skb hash for a packet at the XDP layer. In the helper function,
> 
> Why? i.e., expected use case?
> 
> Pulling this from hardware when possible is better. e.g., Saeed's
> hardware hints proposal includes it.

Thanks for the review and comments.

So, when possible, it might be better to pull the HW hash but not all
HW provides it so this function would be useful in those cases. Also,
this is a precursor to potentially calling the in-kernel flow dissector
from a helper function.
> 
> > a local skb is allocated and we populate the fields needed in the
> > skb
> > before calling skb_get_hash. To avoid memory allocations for each
> > packet,
> > we allocate an skb per CPU and use the same buffer for subsequent
> > hash
> > calculations on the same CPU.
> > 
> > Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com
> > >
> > ---
> >  include/uapi/linux/bpf.h       |  8 ++++++
> >  net/core/filter.c              | 50
> > ++++++++++++++++++++++++++++++++++
> >  tools/include/uapi/linux/bpf.h |  8 ++++++
> >  3 files changed, 66 insertions(+)
> > 
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index b134e679e9db..25aa850c8a40 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -3394,6 +3394,13 @@ union bpf_attr {
> >   *		A non-negative value equal to or less than *size* on
> > success,
> >   *		or a negative error in case of failure.
> >   *
> > + * u32 bpf_get_skb_hash(struct xdp_buff *xdp_md)
> > + *	Description
> > + *		Return the skb hash for the xdp context passed. This
> > function
> > + *		allocates a temporary skb and populates the fields
> > needed. It
> > + *		then calls skb_get_hash to calculate the skb hash for
> > the packet.
> > + *	Return
> > + *		The 32-bit hash.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)		\
> >  	FN(unspec),			\
> > @@ -3538,6 +3545,7 @@ union bpf_attr {
> >  	FN(skc_to_tcp_request_sock),	\
> >  	FN(skc_to_udp6_sock),		\
> >  	FN(get_task_stack),		\
> > +	FN(get_skb_hash),		\
> >  	/* */
> >  
> >  /* integer value in 'imm' field of BPF_CALL instruction selects
> > which helper
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 7124f0fe6974..9f6ad7209b44 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -3765,6 +3765,54 @@ static const struct bpf_func_proto
> > bpf_xdp_redirect_map_proto = {
> >  	.arg3_type      = ARG_ANYTHING,
> >  };
> >  
> > +static DEFINE_PER_CPU(struct sk_buff *, hash_skb);
> > +
> > +BPF_CALL_1(bpf_get_skb_hash, struct xdp_buff *, xdp)
> > +{
> > +	void *data_end = xdp->data_end;
> > +	struct ethhdr *eth = xdp->data;
> > +	void *data = xdp->data;
> > +	unsigned long flags;
> > +	struct sk_buff *skb;
> > +	int nh_off, len;
> > +	u32 ret = 0;
> > +
> > +	/* disable interrupts to get the correct skb pointer */
> > +	local_irq_save(flags);
> > +
> > +	len = data_end - data;
> > +	skb = this_cpu_read(hash_skb);
> > +	if (!skb) {
> > +		skb = alloc_skb(len, GFP_ATOMIC);
> > +		if (!skb)
> > +			goto out;
> > +		this_cpu_write(hash_skb, skb);
> > +	}
> > +
> > +	nh_off = sizeof(*eth);
> 
> vlans?

Yes, need to factor for vlans. Will fix it.
> 
> > +	if (data + nh_off > data_end)
> > +		goto out;
> > +
> > +	skb->data = data;
> > +	skb->head = data;
> > +	skb->network_header = nh_off;
> > +	skb->protocol = eth->h_proto;
> > +	skb->len = len;
> > +	skb->dev = xdp->rxq->dev;
> > +
> > +	ret = skb_get_hash(skb);
> 
> static inline __u32 skb_get_hash(struct sk_buff *skb)
> {
>         if (!skb->l4_hash && !skb->sw_hash)
>                 __skb_get_hash(skb);
> 
>         return skb->hash;
> }
> 
> __skb_get_hash -> __skb_set_sw_hash -> __skb_set_hash which sets
> sw_hash as a minimum, so it seems to me you will always be returning
> the
> hash of the first packet since you do not clear relevant fields of
> the skb.

yes, that's true. Calling skb_clear_hash first should fix this. I will
try it out.

Thanks,
Harshitha.
David Ahern Aug. 12, 2020, 9:13 p.m. UTC | #3
On 8/11/20 3:55 PM, Ramamurthy, Harshitha wrote:
> is a pre-cursor to potentially calling the in-kernel flow dissector from
> a helper function.
> 

That is going to be a challenge to do in a way that does not impact the
kernel's ability to change as needed, and I suspect it will be much
slower than doing in ebpf. Modular ebpf code that can be adapted to use
cases is going to be more appropriate for XDP for example.
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b134e679e9db..25aa850c8a40 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3394,6 +3394,13 @@  union bpf_attr {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * u32 bpf_get_skb_hash(struct xdp_buff *xdp_md)
+ *	Description
+ *		Return the skb hash for the xdp context passed. This function
+ *		allocates a temporary skb and populates the fields needed. It
+ *		then calls skb_get_hash to calculate the skb hash for the packet.
+ *	Return
+ *		The 32-bit hash.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3538,6 +3545,7 @@  union bpf_attr {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(get_skb_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 7124f0fe6974..9f6ad7209b44 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3765,6 +3765,54 @@  static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
+static DEFINE_PER_CPU(struct sk_buff *, hash_skb);
+
+BPF_CALL_1(bpf_get_skb_hash, struct xdp_buff *, xdp)
+{
+	void *data_end = xdp->data_end;
+	struct ethhdr *eth = xdp->data;
+	void *data = xdp->data;
+	unsigned long flags;
+	struct sk_buff *skb;
+	int nh_off, len;
+	u32 ret = 0;
+
+	/* disable interrupts to get the correct skb pointer */
+	local_irq_save(flags);
+
+	len = data_end - data;
+	skb = this_cpu_read(hash_skb);
+	if (!skb) {
+		skb = alloc_skb(len, GFP_ATOMIC);
+		if (!skb)
+			goto out;
+		this_cpu_write(hash_skb, skb);
+	}
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		goto out;
+
+	skb->data = data;
+	skb->head = data;
+	skb->network_header = nh_off;
+	skb->protocol = eth->h_proto;
+	skb->len = len;
+	skb->dev = xdp->rxq->dev;
+
+	ret = skb_get_hash(skb);
+out:
+	local_irq_restore(flags);
+	return ret;
+}
+
+const struct bpf_func_proto bpf_get_skb_hash_proto = {
+	.func		= bpf_get_skb_hash,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
 				  unsigned long off, unsigned long len)
 {
@@ -6503,6 +6551,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_adjust_tail_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
+	case BPF_FUNC_get_skb_hash:
+		return &bpf_get_skb_hash_proto;
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_udp:
 		return &bpf_xdp_sk_lookup_udp_proto;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e679e9db..25aa850c8a40 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3394,6 +3394,13 @@  union bpf_attr {
  *		A non-negative value equal to or less than *size* on success,
  *		or a negative error in case of failure.
  *
+ * u32 bpf_get_skb_hash(struct xdp_buff *xdp_md)
+ *	Description
+ *		Return the skb hash for the xdp context passed. This function
+ *		allocates a temporary skb and populates the fields needed. It
+ *		then calls skb_get_hash to calculate the skb hash for the packet.
+ *	Return
+ *		The 32-bit hash.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3538,6 +3545,7 @@  union bpf_attr {
 	FN(skc_to_tcp_request_sock),	\
 	FN(skc_to_udp6_sock),		\
 	FN(get_task_stack),		\
+	FN(get_skb_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper