diff mbox series

[RFC,net-next,v4,1/4] flow_dissector: Add PPPoE dissectors

Message ID 20220708122421.19309-2-marcin.szycik@linux.intel.com
State RFC
Headers show
Series ice: PPPoE offload support | expand

Commit Message

Marcin Szycik July 8, 2022, 12:24 p.m. UTC
From: Wojciech Drewek <wojciech.drewek@intel.com>

Allow to dissect PPPoE specific fields which are:
- session ID (16 bits)
- ppp protocol (16 bits)
- type (16 bits) - this is PPPoE ethertype, for now only
  ETH_P_PPP_SES is supported, possible ETH_P_PPP_DISC
  in the future

The goal is to make the following TC command possible:

  # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \
      flower \
        pppoe_sid 12 \
        ppp_proto ip \
      action drop

Note that only PPPoE Session is supported.

Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
---
v4:
  * pppoe header validation
  * added MPLS dissection
  * added support for compressed ppp protocol field
  * flow_dissector_key_pppoe::session_id stored in __be16
  * new field: flow_dissector_key_pppoe::type
v3: revert byte order changes in is_ppp_proto_supported from
    previous version
v2: ntohs instead of htons in is_ppp_proto_supported

 include/net/flow_dissector.h | 13 ++++++
 net/core/flow_dissector.c    | 84 +++++++++++++++++++++++++++++++++---
 2 files changed, 90 insertions(+), 7 deletions(-)

Comments

Guillaume Nault July 8, 2022, 7:05 p.m. UTC | #1
On Fri, Jul 08, 2022 at 02:24:18PM +0200, Marcin Szycik wrote:
> From: Wojciech Drewek <wojciech.drewek@intel.com>
> 
> Allow to dissect PPPoE specific fields which are:
> - session ID (16 bits)
> - ppp protocol (16 bits)
> - type (16 bits) - this is PPPoE ethertype, for now only
>   ETH_P_PPP_SES is supported, possible ETH_P_PPP_DISC
>   in the future
> 
> The goal is to make the following TC command possible:
> 
>   # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \
>       flower \
>         pppoe_sid 12 \
>         ppp_proto ip \
>       action drop
> 
> Note that only PPPoE Session is supported.
> 
> Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
> ---
> v4:
>   * pppoe header validation
>   * added MPLS dissection
>   * added support for compressed ppp protocol field
>   * flow_dissector_key_pppoe::session_id stored in __be16
>   * new field: flow_dissector_key_pppoe::type
> v3: revert byte order changes in is_ppp_proto_supported from
>     previous version
> v2: ntohs instead of htons in is_ppp_proto_supported
> 
>  include/net/flow_dissector.h | 13 ++++++
>  net/core/flow_dissector.c    | 84 +++++++++++++++++++++++++++++++++---
>  2 files changed, 90 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> index a4c6057c7097..af0d429b9a26 100644
> --- a/include/net/flow_dissector.h
> +++ b/include/net/flow_dissector.h
> @@ -261,6 +261,18 @@ struct flow_dissector_key_num_of_vlans {
>  	u8 num_of_vlans;
>  };
>  
> +/**
> + * struct flow_dissector_key_pppoe:
> + * @session_id: pppoe session id
> + * @ppp_proto: ppp protocol
> + * @type: pppoe eth type
> + */
> +struct flow_dissector_key_pppoe {
> +	__be16 session_id;
> +	__be16 ppp_proto;
> +	__be16 type;

I don't understand the need for the new 'type' field.

> +};
> +
>  enum flow_dissector_key_id {
>  	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
>  	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
> @@ -291,6 +303,7 @@ enum flow_dissector_key_id {
>  	FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
>  	FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
>  	FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
> +	FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */
>  
>  	FLOW_DISSECTOR_KEY_MAX,
>  };
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index 6aee04f75e3e..3a90219d2354 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -895,6 +895,42 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
>  	return result == BPF_OK;
>  }
>  
> +/**
> + * is_ppp_proto_supported - checks if inner PPP protocol should be dissected
> + * @proto: protocol type (PPP proto field)
> + */
> +static bool is_ppp_proto_supported(__be16 proto)
> +{
> +	switch (proto) {
> +	case htons(PPP_AT):
> +	case htons(PPP_IPX):
> +	case htons(PPP_VJC_COMP):
> +	case htons(PPP_VJC_UNCOMP):
> +	case htons(PPP_MP):
> +	case htons(PPP_COMPFRAG):
> +	case htons(PPP_COMP):
> +	case htons(PPP_IPCP):
> +	case htons(PPP_ATCP):
> +	case htons(PPP_IPXCP):
> +	case htons(PPP_IPV6CP):
> +	case htons(PPP_CCPFRAG):
> +	case htons(PPP_MPLSCP):
> +	case htons(PPP_LCP):
> +	case htons(PPP_PAP):
> +	case htons(PPP_LQR):
> +	case htons(PPP_CHAP):
> +	case htons(PPP_CBCP):
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +
> +static bool is_pppoe_ses_hdr_valid(struct pppoe_hdr hdr)
> +{
> +	return hdr.ver == 1 && hdr.type == 1 && hdr.code == 0;
> +}
> +
>  /**
>   * __skb_flow_dissect - extract the flow_keys struct and return it
>   * @net: associated network namespace, derived from @skb if NULL
> @@ -1214,26 +1250,60 @@ bool __skb_flow_dissect(const struct net *net,
>  			struct pppoe_hdr hdr;
>  			__be16 proto;
>  		} *hdr, _hdr;
> +		__be16 ppp_proto;
> +
>  		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
>  		if (!hdr) {
>  			fdret = FLOW_DISSECT_RET_OUT_BAD;
>  			break;
>  		}
>  
> -		nhoff += PPPOE_SES_HLEN;
> -		switch (hdr->proto) {
> -		case htons(PPP_IP):
> +		if (!is_pppoe_ses_hdr_valid(hdr->hdr)) {
> +			fdret = FLOW_DISSECT_RET_OUT_BAD;
> +			break;
> +		}
> +
> +		/* least significant bit of the first byte
> +		 * indicates if protocol field was compressed
> +		 */
> +		if (hdr->proto & 1) {
> +			ppp_proto = hdr->proto << 8;

This is little endian specific code. We can't make such assumptions.

> +			nhoff += PPPOE_SES_HLEN - 1;
> +		} else {
> +			ppp_proto = hdr->proto;
> +			nhoff += PPPOE_SES_HLEN;
> +		}
> +
> +		if (ppp_proto == htons(PPP_IP)) {
>  			proto = htons(ETH_P_IP);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> -			break;
> -		case htons(PPP_IPV6):
> +		} else if (ppp_proto == htons(PPP_IPV6)) {
>  			proto = htons(ETH_P_IPV6);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> -			break;
> -		default:
> +		} else if (ppp_proto == htons(PPP_MPLS_UC)) {
> +			proto = htons(ETH_P_MPLS_UC);
> +			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> +		} else if (ppp_proto == htons(PPP_MPLS_MC)) {
> +			proto = htons(ETH_P_MPLS_MC);
> +			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> +		} else if (is_ppp_proto_supported(ppp_proto)) {
> +			fdret = FLOW_DISSECT_RET_OUT_GOOD;
> +		} else {
>  			fdret = FLOW_DISSECT_RET_OUT_BAD;
>  			break;
>  		}
> +
> +		if (dissector_uses_key(flow_dissector,
> +				       FLOW_DISSECTOR_KEY_PPPOE)) {
> +			struct flow_dissector_key_pppoe *key_pppoe;
> +
> +			key_pppoe = skb_flow_dissector_target(flow_dissector,
> +							      FLOW_DISSECTOR_KEY_PPPOE,
> +							      target_container);
> +			key_pppoe->session_id = hdr->hdr.sid;
> +			key_pppoe->ppp_proto = ppp_proto;
> +			key_pppoe->type = htons(ETH_P_PPP_SES);
> +		}
>  		break;
>  	}
>  	case htons(ETH_P_TIPC): {
> -- 
> 2.35.1
>
Wojciech Drewek July 11, 2022, 10:23 a.m. UTC | #2
> -----Original Message-----
> From: Guillaume Nault <gnault@redhat.com>
> Sent: piątek, 8 lipca 2022 21:05
> To: Marcin Szycik <marcin.szycik@linux.intel.com>
> Cc: netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>; davem@davemloft.net;
> xiyou.wangcong@gmail.com; Brandeburg, Jesse <jesse.brandeburg@intel.com>; gustavoars@kernel.org;
> baowen.zheng@corigine.com; boris.sukholitko@broadcom.com; edumazet@google.com; kuba@kernel.org; jhs@mojatatu.com;
> jiri@resnulli.us; kurt@linutronix.de; pablo@netfilter.org; pabeni@redhat.com; paulb@nvidia.com; simon.horman@corigine.com;
> komachi.yoshiki@gmail.com; zhangkaiheb@126.com; intel-wired-lan@lists.osuosl.org; michal.swiatkowski@linux.intel.com; Drewek,
> Wojciech <wojciech.drewek@intel.com>; Lobakin, Alexandr <alexandr.lobakin@intel.com>; mostrows@earthlink.net;
> paulus@samba.org
> Subject: Re: [RFC PATCH net-next v4 1/4] flow_dissector: Add PPPoE dissectors
> 
> On Fri, Jul 08, 2022 at 02:24:18PM +0200, Marcin Szycik wrote:
> > From: Wojciech Drewek <wojciech.drewek@intel.com>
> >
> > Allow to dissect PPPoE specific fields which are:
> > - session ID (16 bits)
> > - ppp protocol (16 bits)
> > - type (16 bits) - this is PPPoE ethertype, for now only
> >   ETH_P_PPP_SES is supported, possible ETH_P_PPP_DISC
> >   in the future
> >
> > The goal is to make the following TC command possible:
> >
> >   # tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses \
> >       flower \
> >         pppoe_sid 12 \
> >         ppp_proto ip \
> >       action drop
> >
> > Note that only PPPoE Session is supported.
> >
> > Signed-off-by: Wojciech Drewek <wojciech.drewek@intel.com>
> > ---
> > v4:
> >   * pppoe header validation
> >   * added MPLS dissection
> >   * added support for compressed ppp protocol field
> >   * flow_dissector_key_pppoe::session_id stored in __be16
> >   * new field: flow_dissector_key_pppoe::type
> > v3: revert byte order changes in is_ppp_proto_supported from
> >     previous version
> > v2: ntohs instead of htons in is_ppp_proto_supported
> >
> >  include/net/flow_dissector.h | 13 ++++++
> >  net/core/flow_dissector.c    | 84 +++++++++++++++++++++++++++++++++---
> >  2 files changed, 90 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> > index a4c6057c7097..af0d429b9a26 100644
> > --- a/include/net/flow_dissector.h
> > +++ b/include/net/flow_dissector.h
> > @@ -261,6 +261,18 @@ struct flow_dissector_key_num_of_vlans {
> >  	u8 num_of_vlans;
> >  };
> >
> > +/**
> > + * struct flow_dissector_key_pppoe:
> > + * @session_id: pppoe session id
> > + * @ppp_proto: ppp protocol
> > + * @type: pppoe eth type
> > + */
> > +struct flow_dissector_key_pppoe {
> > +	__be16 session_id;
> > +	__be16 ppp_proto;
> > +	__be16 type;
> 
> I don't understand the need for the new 'type' field.

Let's say user want to add below filter with just protocol field:
tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses action drop

cls_flower would set basic.n_proto to ETH_P_PPP_SES, then PPPoE packet
arrives with ppp_proto = PPP_IP, which means that in  __skb_flow_dissect basic.n_proto is going to
be set to ETH_P_IP. We have a mismatch here cls_flower set basic.n_proto to ETH_P_PPP_SES and
flow_dissector set it to ETH_P_IP. That's why in such example basic.n_proto has to be set to 0 (it works the same 
with vlans) and key_pppoe::type has to be used. In other words basic.n_proto can't be used for storing
ETH_P_PPP_SES because it will store encapsulated protocol.

We could also use it to match on ETH_P_PPP_DISC.

> 
> > +};
> > +
> >  enum flow_dissector_key_id {
> >  	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
> >  	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
> > @@ -291,6 +303,7 @@ enum flow_dissector_key_id {
> >  	FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
> >  	FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
> >  	FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
> > +	FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */
> >
> >  	FLOW_DISSECTOR_KEY_MAX,
> >  };
> > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> > index 6aee04f75e3e..3a90219d2354 100644
> > --- a/net/core/flow_dissector.c
> > +++ b/net/core/flow_dissector.c
> > @@ -895,6 +895,42 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
> >  	return result == BPF_OK;
> >  }
> >
> > +/**
> > + * is_ppp_proto_supported - checks if inner PPP protocol should be dissected
> > + * @proto: protocol type (PPP proto field)
> > + */
> > +static bool is_ppp_proto_supported(__be16 proto)
> > +{
> > +	switch (proto) {
> > +	case htons(PPP_AT):
> > +	case htons(PPP_IPX):
> > +	case htons(PPP_VJC_COMP):
> > +	case htons(PPP_VJC_UNCOMP):
> > +	case htons(PPP_MP):
> > +	case htons(PPP_COMPFRAG):
> > +	case htons(PPP_COMP):
> > +	case htons(PPP_IPCP):
> > +	case htons(PPP_ATCP):
> > +	case htons(PPP_IPXCP):
> > +	case htons(PPP_IPV6CP):
> > +	case htons(PPP_CCPFRAG):
> > +	case htons(PPP_MPLSCP):
> > +	case htons(PPP_LCP):
> > +	case htons(PPP_PAP):
> > +	case htons(PPP_LQR):
> > +	case htons(PPP_CHAP):
> > +	case htons(PPP_CBCP):
> > +		return true;
> > +	default:
> > +		return false;
> > +	}
> > +}
> > +
> > +static bool is_pppoe_ses_hdr_valid(struct pppoe_hdr hdr)
> > +{
> > +	return hdr.ver == 1 && hdr.type == 1 && hdr.code == 0;
> > +}
> > +
> >  /**
> >   * __skb_flow_dissect - extract the flow_keys struct and return it
> >   * @net: associated network namespace, derived from @skb if NULL
> > @@ -1214,26 +1250,60 @@ bool __skb_flow_dissect(const struct net *net,
> >  			struct pppoe_hdr hdr;
> >  			__be16 proto;
> >  		} *hdr, _hdr;
> > +		__be16 ppp_proto;
> > +
> >  		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
> >  		if (!hdr) {
> >  			fdret = FLOW_DISSECT_RET_OUT_BAD;
> >  			break;
> >  		}
> >
> > -		nhoff += PPPOE_SES_HLEN;
> > -		switch (hdr->proto) {
> > -		case htons(PPP_IP):
> > +		if (!is_pppoe_ses_hdr_valid(hdr->hdr)) {
> > +			fdret = FLOW_DISSECT_RET_OUT_BAD;
> > +			break;
> > +		}
> > +
> > +		/* least significant bit of the first byte
> > +		 * indicates if protocol field was compressed
> > +		 */
> > +		if (hdr->proto & 1) {
> > +			ppp_proto = hdr->proto << 8;
> 
> This is little endian specific code. We can't make such assumptions.

Both ppp_proto and hdr->prot are stored in __be16 so left shift by 8 bits
should always be ok, am I right?
Should I use cpu_to_be16 on both 1 and 8. Is that what you mean?

> 
> > +			nhoff += PPPOE_SES_HLEN - 1;
> > +		} else {
> > +			ppp_proto = hdr->proto;
> > +			nhoff += PPPOE_SES_HLEN;
> > +		}
> > +
> > +		if (ppp_proto == htons(PPP_IP)) {
> >  			proto = htons(ETH_P_IP);
> >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > -			break;
> > -		case htons(PPP_IPV6):
> > +		} else if (ppp_proto == htons(PPP_IPV6)) {
> >  			proto = htons(ETH_P_IPV6);
> >  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > -			break;
> > -		default:
> > +		} else if (ppp_proto == htons(PPP_MPLS_UC)) {
> > +			proto = htons(ETH_P_MPLS_UC);
> > +			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > +		} else if (ppp_proto == htons(PPP_MPLS_MC)) {
> > +			proto = htons(ETH_P_MPLS_MC);
> > +			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> > +		} else if (is_ppp_proto_supported(ppp_proto)) {
> > +			fdret = FLOW_DISSECT_RET_OUT_GOOD;
> > +		} else {
> >  			fdret = FLOW_DISSECT_RET_OUT_BAD;
> >  			break;
> >  		}
> > +
> > +		if (dissector_uses_key(flow_dissector,
> > +				       FLOW_DISSECTOR_KEY_PPPOE)) {
> > +			struct flow_dissector_key_pppoe *key_pppoe;
> > +
> > +			key_pppoe = skb_flow_dissector_target(flow_dissector,
> > +							      FLOW_DISSECTOR_KEY_PPPOE,
> > +							      target_container);
> > +			key_pppoe->session_id = hdr->hdr.sid;
> > +			key_pppoe->ppp_proto = ppp_proto;
> > +			key_pppoe->type = htons(ETH_P_PPP_SES);
> > +		}
> >  		break;
> >  	}
> >  	case htons(ETH_P_TIPC): {
> > --
> > 2.35.1
> >
Guillaume Nault July 12, 2022, 5:20 p.m. UTC | #3
On Mon, Jul 11, 2022 at 10:23:50AM +0000, Drewek, Wojciech wrote:
> > > diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> > > index a4c6057c7097..af0d429b9a26 100644
> > > --- a/include/net/flow_dissector.h
> > > +++ b/include/net/flow_dissector.h
> > > @@ -261,6 +261,18 @@ struct flow_dissector_key_num_of_vlans {
> > >  	u8 num_of_vlans;
> > >  };
> > >
> > > +/**
> > > + * struct flow_dissector_key_pppoe:
> > > + * @session_id: pppoe session id
> > > + * @ppp_proto: ppp protocol
> > > + * @type: pppoe eth type
> > > + */
> > > +struct flow_dissector_key_pppoe {
> > > +	__be16 session_id;
> > > +	__be16 ppp_proto;
> > > +	__be16 type;
> > 
> > I don't understand the need for the new 'type' field.
> 
> Let's say user want to add below filter with just protocol field:
> tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses action drop
> 
> cls_flower would set basic.n_proto to ETH_P_PPP_SES, then PPPoE packet
> arrives with ppp_proto = PPP_IP, which means that in  __skb_flow_dissect basic.n_proto is going to
> be set to ETH_P_IP. We have a mismatch here cls_flower set basic.n_proto to ETH_P_PPP_SES and
> flow_dissector set it to ETH_P_IP. That's why in such example basic.n_proto has to be set to 0 (it works the same 
> with vlans) and key_pppoe::type has to be used. In other words basic.n_proto can't be used for storing
> ETH_P_PPP_SES because it will store encapsulated protocol.
> 
> We could also use it to match on ETH_P_PPP_DISC.

Thanks for the explanation. That makes sense.

> > > @@ -1214,26 +1250,60 @@ bool __skb_flow_dissect(const struct net *net,
> > >  			struct pppoe_hdr hdr;
> > >  			__be16 proto;
> > >  		} *hdr, _hdr;
> > > +		__be16 ppp_proto;
> > > +
> > >  		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
> > >  		if (!hdr) {
> > >  			fdret = FLOW_DISSECT_RET_OUT_BAD;
> > >  			break;
> > >  		}
> > >
> > > -		nhoff += PPPOE_SES_HLEN;
> > > -		switch (hdr->proto) {
> > > -		case htons(PPP_IP):
> > > +		if (!is_pppoe_ses_hdr_valid(hdr->hdr)) {
> > > +			fdret = FLOW_DISSECT_RET_OUT_BAD;
> > > +			break;
> > > +		}
> > > +
> > > +		/* least significant bit of the first byte
> > > +		 * indicates if protocol field was compressed
> > > +		 */
> > > +		if (hdr->proto & 1) {
> > > +			ppp_proto = hdr->proto << 8;
> > 
> > This is little endian specific code. We can't make such assumptions.
> 
> Both ppp_proto and hdr->prot are stored in __be16 so left shift by 8 bits
> should always be ok, am I right?

Sorry, I don't understand. How could the test and the bit shift
operation give the correct result on a big endian machine?

Let's say we handle an IPv4 packet and the PPP protocol field isn't
compressed. That is, protocol is 0x0021.
On a big endian machine 'hdr->proto & 1' is true and the bit shift sets
ppp_proto to 0x2100, while the code should have left the original value
untouched.

> Should I use cpu_to_be16 on both 1 and 8. Is that what you mean?

I can't see how cpu_to_be16() could help here. I was thinking of simply
using ntohs(hdr->proto).
Wojciech Drewek July 13, 2022, 7:58 a.m. UTC | #4
> -----Original Message-----
> From: Guillaume Nault <gnault@redhat.com>
> Sent: wtorek, 12 lipca 2022 19:20
> To: Drewek, Wojciech <wojciech.drewek@intel.com>
> Cc: Marcin Szycik <marcin.szycik@linux.intel.com>; netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>;
> davem@davemloft.net; xiyou.wangcong@gmail.com; Brandeburg, Jesse <jesse.brandeburg@intel.com>; gustavoars@kernel.org;
> baowen.zheng@corigine.com; boris.sukholitko@broadcom.com; edumazet@google.com; kuba@kernel.org; jhs@mojatatu.com;
> jiri@resnulli.us; kurt@linutronix.de; pablo@netfilter.org; pabeni@redhat.com; paulb@nvidia.com; simon.horman@corigine.com;
> komachi.yoshiki@gmail.com; zhangkaiheb@126.com; intel-wired-lan@lists.osuosl.org; michal.swiatkowski@linux.intel.com; Lobakin,
> Alexandr <alexandr.lobakin@intel.com>; mostrows@earthlink.net; paulus@samba.org
> Subject: Re: [RFC PATCH net-next v4 1/4] flow_dissector: Add PPPoE dissectors
> 
> On Mon, Jul 11, 2022 at 10:23:50AM +0000, Drewek, Wojciech wrote:
> > > > diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> > > > index a4c6057c7097..af0d429b9a26 100644
> > > > --- a/include/net/flow_dissector.h
> > > > +++ b/include/net/flow_dissector.h
> > > > @@ -261,6 +261,18 @@ struct flow_dissector_key_num_of_vlans {
> > > >  	u8 num_of_vlans;
> > > >  };
> > > >
> > > > +/**
> > > > + * struct flow_dissector_key_pppoe:
> > > > + * @session_id: pppoe session id
> > > > + * @ppp_proto: ppp protocol
> > > > + * @type: pppoe eth type
> > > > + */
> > > > +struct flow_dissector_key_pppoe {
> > > > +	__be16 session_id;
> > > > +	__be16 ppp_proto;
> > > > +	__be16 type;
> > >
> > > I don't understand the need for the new 'type' field.
> >
> > Let's say user want to add below filter with just protocol field:
> > tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses action drop
> >
> > cls_flower would set basic.n_proto to ETH_P_PPP_SES, then PPPoE packet
> > arrives with ppp_proto = PPP_IP, which means that in  __skb_flow_dissect basic.n_proto is going to
> > be set to ETH_P_IP. We have a mismatch here cls_flower set basic.n_proto to ETH_P_PPP_SES and
> > flow_dissector set it to ETH_P_IP. That's why in such example basic.n_proto has to be set to 0 (it works the same
> > with vlans) and key_pppoe::type has to be used. In other words basic.n_proto can't be used for storing
> > ETH_P_PPP_SES because it will store encapsulated protocol.
> >
> > We could also use it to match on ETH_P_PPP_DISC.
> 
> Thanks for the explanation. That makes sense.
> 
> > > > @@ -1214,26 +1250,60 @@ bool __skb_flow_dissect(const struct net *net,
> > > >  			struct pppoe_hdr hdr;
> > > >  			__be16 proto;
> > > >  		} *hdr, _hdr;
> > > > +		__be16 ppp_proto;
> > > > +
> > > >  		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
> > > >  		if (!hdr) {
> > > >  			fdret = FLOW_DISSECT_RET_OUT_BAD;
> > > >  			break;
> > > >  		}
> > > >
> > > > -		nhoff += PPPOE_SES_HLEN;
> > > > -		switch (hdr->proto) {
> > > > -		case htons(PPP_IP):
> > > > +		if (!is_pppoe_ses_hdr_valid(hdr->hdr)) {
> > > > +			fdret = FLOW_DISSECT_RET_OUT_BAD;
> > > > +			break;
> > > > +		}
> > > > +
> > > > +		/* least significant bit of the first byte
> > > > +		 * indicates if protocol field was compressed
> > > > +		 */
> > > > +		if (hdr->proto & 1) {
> > > > +			ppp_proto = hdr->proto << 8;
> > >
> > > This is little endian specific code. We can't make such assumptions.
> >
> > Both ppp_proto and hdr->prot are stored in __be16 so left shift by 8 bits
> > should always be ok, am I right?
> 
> Sorry, I don't understand. How could the test and the bit shift
> operation give the correct result on a big endian machine?
> 
> Let's say we handle an IPv4 packet and the PPP protocol field isn't
> compressed. That is, protocol is 0x0021.
> On a big endian machine 'hdr->proto & 1' is true and the bit shift sets
> ppp_proto to 0x2100, while the code should have left the original value
> untouched.
> 

Ok, I see now, we'll fix it in the next version.

> > Should I use cpu_to_be16 on both 1 and 8. Is that what you mean?
> 
> I can't see how cpu_to_be16() could help here. I was thinking of simply
> using ntohs(hdr->proto).
Wojciech Drewek July 13, 2022, 1:54 p.m. UTC | #5
> -----Original Message-----
> From: Drewek, Wojciech
> Sent: środa, 13 lipca 2022 09:59
> To: Guillaume Nault <gnault@redhat.com>
> Cc: Marcin Szycik <marcin.szycik@linux.intel.com>; netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>;
> davem@davemloft.net; xiyou.wangcong@gmail.com; Brandeburg, Jesse <jesse.brandeburg@intel.com>; gustavoars@kernel.org;
> baowen.zheng@corigine.com; boris.sukholitko@broadcom.com; edumazet@google.com; kuba@kernel.org; jhs@mojatatu.com;
> jiri@resnulli.us; kurt@linutronix.de; pablo@netfilter.org; pabeni@redhat.com; paulb@nvidia.com; simon.horman@corigine.com;
> komachi.yoshiki@gmail.com; zhangkaiheb@126.com; intel-wired-lan@lists.osuosl.org; michal.swiatkowski@linux.intel.com; Lobakin,
> Alexandr <alexandr.lobakin@intel.com>; mostrows@earthlink.net; paulus@samba.org
> Subject: RE: [RFC PATCH net-next v4 1/4] flow_dissector: Add PPPoE dissectors
> 
> 
> 
> > -----Original Message-----
> > From: Guillaume Nault <gnault@redhat.com>
> > Sent: wtorek, 12 lipca 2022 19:20
> > To: Drewek, Wojciech <wojciech.drewek@intel.com>
> > Cc: Marcin Szycik <marcin.szycik@linux.intel.com>; netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>;
> > davem@davemloft.net; xiyou.wangcong@gmail.com; Brandeburg, Jesse <jesse.brandeburg@intel.com>; gustavoars@kernel.org;
> > baowen.zheng@corigine.com; boris.sukholitko@broadcom.com; edumazet@google.com; kuba@kernel.org; jhs@mojatatu.com;
> > jiri@resnulli.us; kurt@linutronix.de; pablo@netfilter.org; pabeni@redhat.com; paulb@nvidia.com; simon.horman@corigine.com;
> > komachi.yoshiki@gmail.com; zhangkaiheb@126.com; intel-wired-lan@lists.osuosl.org; michal.swiatkowski@linux.intel.com; Lobakin,
> > Alexandr <alexandr.lobakin@intel.com>; mostrows@earthlink.net; paulus@samba.org
> > Subject: Re: [RFC PATCH net-next v4 1/4] flow_dissector: Add PPPoE dissectors
> >
> > On Mon, Jul 11, 2022 at 10:23:50AM +0000, Drewek, Wojciech wrote:
> > > > > diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
> > > > > index a4c6057c7097..af0d429b9a26 100644
> > > > > --- a/include/net/flow_dissector.h
> > > > > +++ b/include/net/flow_dissector.h
> > > > > @@ -261,6 +261,18 @@ struct flow_dissector_key_num_of_vlans {
> > > > >  	u8 num_of_vlans;
> > > > >  };
> > > > >
> > > > > +/**
> > > > > + * struct flow_dissector_key_pppoe:
> > > > > + * @session_id: pppoe session id
> > > > > + * @ppp_proto: ppp protocol
> > > > > + * @type: pppoe eth type
> > > > > + */
> > > > > +struct flow_dissector_key_pppoe {
> > > > > +	__be16 session_id;
> > > > > +	__be16 ppp_proto;
> > > > > +	__be16 type;
> > > >
> > > > I don't understand the need for the new 'type' field.
> > >
> > > Let's say user want to add below filter with just protocol field:
> > > tc filter add dev ens6f0 ingress prio 1 protocol ppp_ses action drop
> > >
> > > cls_flower would set basic.n_proto to ETH_P_PPP_SES, then PPPoE packet
> > > arrives with ppp_proto = PPP_IP, which means that in  __skb_flow_dissect basic.n_proto is going to
> > > be set to ETH_P_IP. We have a mismatch here cls_flower set basic.n_proto to ETH_P_PPP_SES and
> > > flow_dissector set it to ETH_P_IP. That's why in such example basic.n_proto has to be set to 0 (it works the same
> > > with vlans) and key_pppoe::type has to be used. In other words basic.n_proto can't be used for storing
> > > ETH_P_PPP_SES because it will store encapsulated protocol.
> > >
> > > We could also use it to match on ETH_P_PPP_DISC.
> >
> > Thanks for the explanation. That makes sense.
> >
> > > > > @@ -1214,26 +1250,60 @@ bool __skb_flow_dissect(const struct net *net,
> > > > >  			struct pppoe_hdr hdr;
> > > > >  			__be16 proto;
> > > > >  		} *hdr, _hdr;
> > > > > +		__be16 ppp_proto;
> > > > > +
> > > > >  		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
> > > > >  		if (!hdr) {
> > > > >  			fdret = FLOW_DISSECT_RET_OUT_BAD;
> > > > >  			break;
> > > > >  		}
> > > > >
> > > > > -		nhoff += PPPOE_SES_HLEN;
> > > > > -		switch (hdr->proto) {
> > > > > -		case htons(PPP_IP):
> > > > > +		if (!is_pppoe_ses_hdr_valid(hdr->hdr)) {
> > > > > +			fdret = FLOW_DISSECT_RET_OUT_BAD;
> > > > > +			break;
> > > > > +		}
> > > > > +
> > > > > +		/* least significant bit of the first byte
> > > > > +		 * indicates if protocol field was compressed
> > > > > +		 */
> > > > > +		if (hdr->proto & 1) {
> > > > > +			ppp_proto = hdr->proto << 8;
> > > >
> > > > This is little endian specific code. We can't make such assumptions.
> > >
> > > Both ppp_proto and hdr->prot are stored in __be16 so left shift by 8 bits
> > > should always be ok, am I right?
> >
> > Sorry, I don't understand. How could the test and the bit shift
> > operation give the correct result on a big endian machine?
> >
> > Let's say we handle an IPv4 packet and the PPP protocol field isn't
> > compressed. That is, protocol is 0x0021.
> > On a big endian machine 'hdr->proto & 1' is true and the bit shift sets
> > ppp_proto to 0x2100, while the code should have left the original value
> > untouched.
> >
> 
> Ok, I see now, we'll fix it in the next version.

I think this should work with both LE and BE arch, what do you think Guillaume?
We don't want to spam so much with next versions so maybe it is better
to ask earlier.

	u16 ppp_proto;

	ppp_proto = ntohs(hdr->proto);
	if (ppp_proto & 256) {
		ppp_proto = htons(ppp_proto >> 8);
		nhoff += PPPOE_SES_HLEN - 1;
	} else {
		ppp_proto = htons(ppp_proto);
		nhoff += PPPOE_SES_HLEN;
	}

> 
> > > Should I use cpu_to_be16 on both 1 and 8. Is that what you mean?
> >
> > I can't see how cpu_to_be16() could help here. I was thinking of simply
> > using ntohs(hdr->proto).
Guillaume Nault July 17, 2022, 11:15 a.m. UTC | #6
On Wed, Jul 13, 2022 at 01:54:35PM +0000, Drewek, Wojciech wrote:
> I think this should work with both LE and BE arch, what do you think Guillaume?
> We don't want to spam so much with next versions so maybe it is better
> to ask earlier.
> 
> 	u16 ppp_proto;
> 
> 	ppp_proto = ntohs(hdr->proto);
> 	if (ppp_proto & 256) {
> 		ppp_proto = htons(ppp_proto >> 8);
> 		nhoff += PPPOE_SES_HLEN - 1;
> 	} else {
> 		ppp_proto = htons(ppp_proto);
> 		nhoff += PPPOE_SES_HLEN;
> 	}

Sorry for responding late. I was away this week (and will be next week
too) and have very sporadic (and slow) Internet connection and limitted
time for review. I saw you've sent a v5 with this code, I'll reply
there. Thanks for being so patient.
diff mbox series

Patch

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index a4c6057c7097..af0d429b9a26 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -261,6 +261,18 @@  struct flow_dissector_key_num_of_vlans {
 	u8 num_of_vlans;
 };
 
+/**
+ * struct flow_dissector_key_pppoe:
+ * @session_id: pppoe session id
+ * @ppp_proto: ppp protocol
+ * @type: pppoe eth type
+ */
+struct flow_dissector_key_pppoe {
+	__be16 session_id;
+	__be16 ppp_proto;
+	__be16 type;
+};
+
 enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
 	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
@@ -291,6 +303,7 @@  enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */
 	FLOW_DISSECTOR_KEY_HASH, /* struct flow_dissector_key_hash */
 	FLOW_DISSECTOR_KEY_NUM_OF_VLANS, /* struct flow_dissector_key_num_of_vlans */
+	FLOW_DISSECTOR_KEY_PPPOE, /* struct flow_dissector_key_pppoe */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 6aee04f75e3e..3a90219d2354 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -895,6 +895,42 @@  bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 	return result == BPF_OK;
 }
 
+/**
+ * is_ppp_proto_supported - checks if inner PPP protocol should be dissected
+ * @proto: protocol type (PPP proto field)
+ */
+static bool is_ppp_proto_supported(__be16 proto)
+{
+	switch (proto) {
+	case htons(PPP_AT):
+	case htons(PPP_IPX):
+	case htons(PPP_VJC_COMP):
+	case htons(PPP_VJC_UNCOMP):
+	case htons(PPP_MP):
+	case htons(PPP_COMPFRAG):
+	case htons(PPP_COMP):
+	case htons(PPP_IPCP):
+	case htons(PPP_ATCP):
+	case htons(PPP_IPXCP):
+	case htons(PPP_IPV6CP):
+	case htons(PPP_CCPFRAG):
+	case htons(PPP_MPLSCP):
+	case htons(PPP_LCP):
+	case htons(PPP_PAP):
+	case htons(PPP_LQR):
+	case htons(PPP_CHAP):
+	case htons(PPP_CBCP):
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_pppoe_ses_hdr_valid(struct pppoe_hdr hdr)
+{
+	return hdr.ver == 1 && hdr.type == 1 && hdr.code == 0;
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @net: associated network namespace, derived from @skb if NULL
@@ -1214,26 +1250,60 @@  bool __skb_flow_dissect(const struct net *net,
 			struct pppoe_hdr hdr;
 			__be16 proto;
 		} *hdr, _hdr;
+		__be16 ppp_proto;
+
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr) {
 			fdret = FLOW_DISSECT_RET_OUT_BAD;
 			break;
 		}
 
-		nhoff += PPPOE_SES_HLEN;
-		switch (hdr->proto) {
-		case htons(PPP_IP):
+		if (!is_pppoe_ses_hdr_valid(hdr->hdr)) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
+		/* least significant bit of the first byte
+		 * indicates if protocol field was compressed
+		 */
+		if (hdr->proto & 1) {
+			ppp_proto = hdr->proto << 8;
+			nhoff += PPPOE_SES_HLEN - 1;
+		} else {
+			ppp_proto = hdr->proto;
+			nhoff += PPPOE_SES_HLEN;
+		}
+
+		if (ppp_proto == htons(PPP_IP)) {
 			proto = htons(ETH_P_IP);
 			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-			break;
-		case htons(PPP_IPV6):
+		} else if (ppp_proto == htons(PPP_IPV6)) {
 			proto = htons(ETH_P_IPV6);
 			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-			break;
-		default:
+		} else if (ppp_proto == htons(PPP_MPLS_UC)) {
+			proto = htons(ETH_P_MPLS_UC);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		} else if (ppp_proto == htons(PPP_MPLS_MC)) {
+			proto = htons(ETH_P_MPLS_MC);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		} else if (is_ppp_proto_supported(ppp_proto)) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		} else {
 			fdret = FLOW_DISSECT_RET_OUT_BAD;
 			break;
 		}
+
+		if (dissector_uses_key(flow_dissector,
+				       FLOW_DISSECTOR_KEY_PPPOE)) {
+			struct flow_dissector_key_pppoe *key_pppoe;
+
+			key_pppoe = skb_flow_dissector_target(flow_dissector,
+							      FLOW_DISSECTOR_KEY_PPPOE,
+							      target_container);
+			key_pppoe->session_id = hdr->hdr.sid;
+			key_pppoe->ppp_proto = ppp_proto;
+			key_pppoe->type = htons(ETH_P_PPP_SES);
+		}
 		break;
 	}
 	case htons(ETH_P_TIPC): {