diff mbox series

[bpf-next] bpf: add bpf_get_xdp_hash helper function

Message ID 20200831192506.28896-1-harshitha.ramamurthy@intel.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [bpf-next] bpf: add bpf_get_xdp_hash helper function | expand

Commit Message

Harshitha Ramamurthy Aug. 31, 2020, 7:25 p.m. UTC
This patch adds a helper function called bpf_get_xdp_hash to calculate
the hash for a packet at the XDP layer. In the helper function, we call
the kernel flow dissector in non-skb mode by passing the net pointer
to calculate the hash.

Changes since RFC:
- accounted for vlans(David Ahern)
- return the correct hash by not using skb_get_hash(David Ahern)
- call __skb_flow_dissect in non-skb mode

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
---
 include/uapi/linux/bpf.h       |  9 +++++++++
 net/core/filter.c              | 29 +++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 3 files changed, 47 insertions(+)

Comments

David Ahern Aug. 31, 2020, 7:54 p.m. UTC | #1
On 8/31/20 1:25 PM, Harshitha Ramamurthy wrote:
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index a613750d5515..bffe93b526e7 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3576,6 +3576,14 @@ union bpf_attr {
>   * 		the data in *dst*. This is a wrapper of copy_from_user().
>   * 	Return
>   * 		0 on success, or a negative error in case of failure.
> + *
> + * u32 bpf_get_xdp_hash(struct xdp_buff *xdp_md)

I thought there was a change recently making the uapi reference xdp_md;
xdp_buff is not exported as part of the uapi.


> + *	Description
> + *		Return the hash for the xdp context passed. This function
> + *		calls skb_flow_dissect in non-skb mode to calculate the
> + *		hash for the packet.
> + *	Return
> + *		The 32-bit hash.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -3727,6 +3735,7 @@ union bpf_attr {
>  	FN(inode_storage_delete),	\
>  	FN(d_path),			\
>  	FN(copy_from_user),		\
> +	FN(get_xdp_hash),		\
>  	/* */
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 47eef9a0be6a..cfb5a6aea6c3 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3765,6 +3765,33 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
>  	.arg3_type      = ARG_ANYTHING,
>  };
>  
> +BPF_CALL_1(bpf_get_xdp_hash, struct xdp_buff *, xdp)
> +{
> +	void *data_end = xdp->data_end;
> +	struct ethhdr *eth = xdp->data;
> +	void *data = xdp->data;
> +	struct flow_keys keys;
> +	u32 ret = 0;
> +	int len;
> +
> +	len = data_end - data;
> +	if (len <= 0)
> +		return ret;

you should verify len covers the ethernet header. Looking at
__skb_flow_dissect use of hlen presumes it exists.

> +	memset(&keys, 0, sizeof(keys));
> +	__skb_flow_dissect(dev_net(xdp->rxq->dev), NULL, &flow_keys_dissector,
> +			   &keys, data, eth->h_proto, sizeof(*eth), len,
> +			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);

By STOP_AT_FLOW_LABEL I take it you want this to be an L3 hash. Why not
add a flags argument to the helper and let the hash be L3 or L4?


you should add test cases and have them cover the permutations - e.g.,
vlan, Q-in-Q, ipv4, ipv6, non-IP packet for L3 hash and then udp, tcp
for L4 hash.
Daniel Borkmann Aug. 31, 2020, 8:33 p.m. UTC | #2
On 8/31/20 9:25 PM, Harshitha Ramamurthy wrote:
> This patch adds a helper function called bpf_get_xdp_hash to calculate
> the hash for a packet at the XDP layer. In the helper function, we call
> the kernel flow dissector in non-skb mode by passing the net pointer
> to calculate the hash.

So this commit msg says 'what' the patch does, but says nothing about 'why' it is
needed especially given there's the 1 mio insn limit in place where it should be
easy to write that up in BPF anyway. The commit msg needs to have a clear rationale
which describes the motivation behind this helper.. why it cannot be done in BPF
itself?

> Changes since RFC:
> - accounted for vlans(David Ahern)
> - return the correct hash by not using skb_get_hash(David Ahern)
> - call __skb_flow_dissect in non-skb mode
>
Harshitha Ramamurthy Sept. 2, 2020, 12:52 a.m. UTC | #3
> From: bpf-owner@vger.kernel.org <bpf-owner@vger.kernel.org> On Behalf
> Of David Ahern
> Sent: Monday, August 31, 2020 12:54 PM
> To: Ramamurthy, Harshitha <harshitha.ramamurthy@intel.com>;
> bpf@vger.kernel.org; netdev@vger.kernel.org; ast@kernel.org;
> daniel@iogearbox.net; davem@davemloft.net; kuba@kernel.org
> Cc: Duyck, Alexander H <alexander.h.duyck@intel.com>; Herbert, Tom
> <tom.herbert@intel.com>
> Subject: Re: [PATCH bpf-next] bpf: add bpf_get_xdp_hash helper function
> 
> On 8/31/20 1:25 PM, Harshitha Ramamurthy wrote:
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index
> > a613750d5515..bffe93b526e7 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -3576,6 +3576,14 @@ union bpf_attr {
> >   * 		the data in *dst*. This is a wrapper of copy_from_user().
> >   * 	Return
> >   * 		0 on success, or a negative error in case of failure.
> > + *
> > + * u32 bpf_get_xdp_hash(struct xdp_buff *xdp_md)
> 
> I thought there was a change recently making the uapi reference xdp_md;
> xdp_buff is not exported as part of the uapi.

Not sure what you mean - other xdp related helper functions still use xdp_buff as an argument. Could you point me to an example of what you are referring to?

> 
> 
> > + *	Description
> > + *		Return the hash for the xdp context passed. This function
> > + *		calls skb_flow_dissect in non-skb mode to calculate the
> > + *		hash for the packet.
> > + *	Return
> > + *		The 32-bit hash.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)		\
> >  	FN(unspec),			\
> > @@ -3727,6 +3735,7 @@ union bpf_attr {
> >  	FN(inode_storage_delete),	\
> >  	FN(d_path),			\
> >  	FN(copy_from_user),		\
> > +	FN(get_xdp_hash),		\
> >  	/* */
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which
> > helper diff --git a/net/core/filter.c b/net/core/filter.c index
> > 47eef9a0be6a..cfb5a6aea6c3 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -3765,6 +3765,33 @@ static const struct bpf_func_proto
> bpf_xdp_redirect_map_proto = {
> >  	.arg3_type      = ARG_ANYTHING,
> >  };
> >
> > +BPF_CALL_1(bpf_get_xdp_hash, struct xdp_buff *, xdp) {
> > +	void *data_end = xdp->data_end;
> > +	struct ethhdr *eth = xdp->data;
> > +	void *data = xdp->data;
> > +	struct flow_keys keys;
> > +	u32 ret = 0;
> > +	int len;
> > +
> > +	len = data_end - data;
> > +	if (len <= 0)
> > +		return ret;
> 
> you should verify len covers the ethernet header. Looking at
> __skb_flow_dissect use of hlen presumes it exists.

Yes,  I will make sure to return if len < sizeof(struct ethhdr)

> 
> > +	memset(&keys, 0, sizeof(keys));
> > +	__skb_flow_dissect(dev_net(xdp->rxq->dev), NULL,
> &flow_keys_dissector,
> > +			   &keys, data, eth->h_proto, sizeof(*eth), len,
> > +			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
> 
> By STOP_AT_FLOW_LABEL I take it you want this to be an L3 hash. Why not
> add a flags argument to the helper and let the hash be L3 or L4?

I wrote this exactly how skb_get_hash calls skb_flow_dissect - with the same flag STOP_AT_FLOW_LABEL.  So it should already cover L3 and L4 hash, right? From what I understand STOP_AT_FLOW_LABEL flag is used to only stop parsing when a flow label is seen in ipv6 packets. 

> 
> 
> you should add test cases and have them cover the permutations - e.g., vlan,
> Q-in-Q, ipv4, ipv6, non-IP packet for L3 hash and then udp, tcp for L4 hash.

Sure, I will add test cases using this helper function.

Thanks for the feedback!
Harshitha
Harshitha Ramamurthy Sept. 2, 2020, 12:54 a.m. UTC | #4
> From: Daniel Borkmann <daniel@iogearbox.net>
> Sent: Monday, August 31, 2020 1:33 PM
> To: Ramamurthy, Harshitha <harshitha.ramamurthy@intel.com>;
> bpf@vger.kernel.org; netdev@vger.kernel.org; ast@kernel.org;
> davem@davemloft.net; kuba@kernel.org
> Cc: dsahern@gmail.com; Duyck, Alexander H
> <alexander.h.duyck@intel.com>; Herbert, Tom <tom.herbert@intel.com>
> Subject: Re: [PATCH bpf-next] bpf: add bpf_get_xdp_hash helper function
> 
> On 8/31/20 9:25 PM, Harshitha Ramamurthy wrote:
> > This patch adds a helper function called bpf_get_xdp_hash to calculate
> > the hash for a packet at the XDP layer. In the helper function, we
> > call the kernel flow dissector in non-skb mode by passing the net
> > pointer to calculate the hash.
> 
> So this commit msg says 'what' the patch does, but says nothing about 'why'
> it is needed especially given there's the 1 mio insn limit in place where it
> should be easy to write that up in BPF anyway. The commit msg needs to
> have a clear rationale which describes the motivation behind this helper..
> why it cannot be done in BPF itself?

Okay, will add a rationale in the commit message in the next version for use-case. 

Thanks,
Harshitha
> 
> > Changes since RFC:
> > - accounted for vlans(David Ahern)
> > - return the correct hash by not using skb_get_hash(David Ahern)
> > - call __skb_flow_dissect in non-skb mode
> >
David Ahern Sept. 2, 2020, 2:09 a.m. UTC | #5
On 9/1/20 6:52 PM, Ramamurthy, Harshitha wrote:
>> On 8/31/20 1:25 PM, Harshitha Ramamurthy wrote:
>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index
>>> a613750d5515..bffe93b526e7 100644
>>> --- a/include/uapi/linux/bpf.h
>>> +++ b/include/uapi/linux/bpf.h
>>> @@ -3576,6 +3576,14 @@ union bpf_attr {
>>>   * 		the data in *dst*. This is a wrapper of copy_from_user().
>>>   * 	Return
>>>   * 		0 on success, or a negative error in case of failure.
>>> + *
>>> + * u32 bpf_get_xdp_hash(struct xdp_buff *xdp_md)
>>
>> I thought there was a change recently making the uapi reference xdp_md;
>> xdp_buff is not exported as part of the uapi.
> 
> Not sure what you mean - other xdp related helper functions still use xdp_buff as an argument. Could you point me to an example of what you are referring to?

a patch from Jakub that is apparently not committed yet.


>>
>>> +	memset(&keys, 0, sizeof(keys));
>>> +	__skb_flow_dissect(dev_net(xdp->rxq->dev), NULL,
>> &flow_keys_dissector,
>>> +			   &keys, data, eth->h_proto, sizeof(*eth), len,
>>> +			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
>>
>> By STOP_AT_FLOW_LABEL I take it you want this to be an L3 hash. Why not
>> add a flags argument to the helper and let the hash be L3 or L4?
> 
> I wrote this exactly how skb_get_hash calls skb_flow_dissect - with the same flag STOP_AT_FLOW_LABEL.  So it should already cover L3 and L4 hash, right? From what I understand STOP_AT_FLOW_LABEL flag is used to only stop parsing when a flow label is seen in ipv6 packets. 

right; missed that. That means this new helper will always do an L4 hash
for tcp/udp. Adding a flags argument now for the uapi allows a later
change to make it an L3 hash.
Tom Herbert Sept. 2, 2020, 3:56 p.m. UTC | #6
On Mon, Aug 31, 2020 at 2:33 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 8/31/20 9:25 PM, Harshitha Ramamurthy wrote:
> > This patch adds a helper function called bpf_get_xdp_hash to calculate
> > the hash for a packet at the XDP layer. In the helper function, we call
> > the kernel flow dissector in non-skb mode by passing the net pointer
> > to calculate the hash.
>
> So this commit msg says 'what' the patch does, but says nothing about 'why' it is
> needed especially given there's the 1 mio insn limit in place where it should be
> easy to write that up in BPF anyway. The commit msg needs to have a clear rationale
> which describes the motivation behind this helper.. why it cannot be done in BPF
> itself?
>
Daniel,

We already have a fully functional, well tested, and very complete
parser supporting many protocols and packet hash functions in the
kernel in skb_flow_dissect and friends. I suppose we could replicate
all that code in eBPF but I don't think it's fair to say it's easy to
get to the same level in eBPF. eBPF does solve the problem of
extensibility of kernel code, however IMO if someone is looking for an
easy way to get a packet hash then a simple helper function makes
sense and results in a much simpler XDP program.

There's some nice potential follow on work also. If the hardware
provides a quality L4 hash that could be passed into the XDP program
to avoid having to call the helper. If we do invoke the helper it
would make sense to return the hash to the driver so that it can set
in the skbuff to avoid having the stack call skb_flow_dissect again.
We can also have an flow_diessct helper that invokes skb_flow_dissect
and returns the meta data based on input keys. This would be useful
for filtering in XDP etc.

Tom

> > Changes since RFC:
> > - accounted for vlans(David Ahern)
> > - return the correct hash by not using skb_get_hash(David Ahern)
> > - call __skb_flow_dissect in non-skb mode
> >
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a613750d5515..bffe93b526e7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3576,6 +3576,14 @@  union bpf_attr {
  * 		the data in *dst*. This is a wrapper of copy_from_user().
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_xdp_hash(struct xdp_buff *xdp_md)
+ *	Description
+ *		Return the hash for the xdp context passed. This function
+ *		calls skb_flow_dissect in non-skb mode to calculate the
+ *		hash for the packet.
+ *	Return
+ *		The 32-bit hash.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3727,6 +3735,7 @@  union bpf_attr {
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
 	FN(copy_from_user),		\
+	FN(get_xdp_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 47eef9a0be6a..cfb5a6aea6c3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3765,6 +3765,33 @@  static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_get_xdp_hash, struct xdp_buff *, xdp)
+{
+	void *data_end = xdp->data_end;
+	struct ethhdr *eth = xdp->data;
+	void *data = xdp->data;
+	struct flow_keys keys;
+	u32 ret = 0;
+	int len;
+
+	len = data_end - data;
+	if (len <= 0)
+		return ret;
+	memset(&keys, 0, sizeof(keys));
+	__skb_flow_dissect(dev_net(xdp->rxq->dev), NULL, &flow_keys_dissector,
+			   &keys, data, eth->h_proto, sizeof(*eth), len,
+			   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+	ret = flow_hash_from_keys(&keys);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_get_xdp_hash_proto = {
+	.func		= bpf_get_xdp_hash,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
 				  unsigned long off, unsigned long len)
 {
@@ -6837,6 +6864,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_adjust_tail_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
+	case BPF_FUNC_get_xdp_hash:
+		return &bpf_get_xdp_hash_proto;
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_udp:
 		return &bpf_xdp_sk_lookup_udp_proto;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a613750d5515..bffe93b526e7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3576,6 +3576,14 @@  union bpf_attr {
  * 		the data in *dst*. This is a wrapper of copy_from_user().
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_xdp_hash(struct xdp_buff *xdp_md)
+ *	Description
+ *		Return the hash for the xdp context passed. This function
+ *		calls skb_flow_dissect in non-skb mode to calculate the
+ *		hash for the packet.
+ *	Return
+ *		The 32-bit hash.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3727,6 +3735,7 @@  union bpf_attr {
 	FN(inode_storage_delete),	\
 	FN(d_path),			\
 	FN(copy_from_user),		\
+	FN(get_xdp_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper