diff mbox series

[ovs-dev,net-next,6/8] net:openvswitch: add psample support

Message ID 20240424135109.3524355-7-amorenoz@redhat.com
State Handled Elsewhere
Headers show
Series net: openvswitch: Add sample multicasting. | expand

Commit Message

Adrián Moreno April 24, 2024, 1:50 p.m. UTC
Add support for psample sampling via two new attributes to the
OVS_ACTION_ATTR_SAMPLE action.

OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
cookie that will be forwared to psample.

The maximum length of the user-defined cookie is set to 16, same as
tc_cookie, to discourage using cookies that will not be offloadable.

In order to simplify the internal processing of the action and given the
maximum size of the cookie is relatively small, add both fields to the
internal-only struct sample_arg.

The presence of a group_id mandates that the action shall called the
psample module to multicast the packet with such group_id and the
user-provided cookie if present. This behavior is orthonogal to
also executing the nested actions if present.

Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
---
 Documentation/netlink/specs/ovs_flow.yaml |  6 ++
 include/uapi/linux/openvswitch.h          | 49 ++++++++++----
 net/openvswitch/actions.c                 | 51 +++++++++++++--
 net/openvswitch/flow_netlink.c            | 80 ++++++++++++++++++-----
 4 files changed, 153 insertions(+), 33 deletions(-)

Comments

Dan Carpenter April 30, 2024, 7:29 a.m. UTC | #1
Hi Adrian,

kernel test robot noticed the following build warnings:

url:    https://github.com/intel-lab-lkp/linux/commits/Adrian-Moreno/net-netlink-export-genl-private-pointer-getters/20240424-215821
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240424135109.3524355-7-amorenoz%40redhat.com
patch subject: [PATCH net-next 6/8] net:openvswitch: add psample support
config: i386-randconfig-141-20240430 (https://download.01.org/0day-ci/archive/20240430/202404300611.kJOZL2KI-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202404300611.kJOZL2KI-lkp@intel.com/

New smatch warnings:
net/openvswitch/actions.c:1097 sample() error: uninitialized symbol 'ret'.

vim +/ret +1097 net/openvswitch/actions.c

ccb1352e76cff0 Jesse Gross        2011-10-25  1061  static int sample(struct datapath *dp, struct sk_buff *skb,
ccea74457bbdaf Neil McKee         2015-05-26  1062  		  struct sw_flow_key *key, const struct nlattr *attr,
798c166173ffb5 andy zhou          2017-03-20  1063  		  bool last)
ccb1352e76cff0 Jesse Gross        2011-10-25  1064  {
ccc0b9e4657efd Adrian Moreno      2024-04-24  1065  	const struct sample_arg *arg;
798c166173ffb5 andy zhou          2017-03-20  1066  	struct nlattr *sample_arg;
798c166173ffb5 andy zhou          2017-03-20  1067  	int rem = nla_len(attr);
ccc0b9e4657efd Adrian Moreno      2024-04-24  1068  	struct nlattr *actions;
bef7f7567a104a andy zhou          2017-03-20  1069  	bool clone_flow_key;
ccc0b9e4657efd Adrian Moreno      2024-04-24  1070  	int ret;
ccb1352e76cff0 Jesse Gross        2011-10-25  1071  
798c166173ffb5 andy zhou          2017-03-20  1072  	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
798c166173ffb5 andy zhou          2017-03-20  1073  	sample_arg = nla_data(attr);
798c166173ffb5 andy zhou          2017-03-20  1074  	arg = nla_data(sample_arg);
798c166173ffb5 andy zhou          2017-03-20  1075  	actions = nla_next(sample_arg, &rem);
e05176a3283822 Wenyu Zhang        2015-08-05  1076  
798c166173ffb5 andy zhou          2017-03-20  1077  	if ((arg->probability != U32_MAX) &&
a251c17aa558d8 Jason A. Donenfeld 2022-10-05  1078  	    (!arg->probability || get_random_u32() > arg->probability)) {
798c166173ffb5 andy zhou          2017-03-20  1079  		if (last)
9d802da40b7c82 Adrian Moreno      2023-08-11  1080  			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
ccb1352e76cff0 Jesse Gross        2011-10-25  1081  		return 0;
ccb1352e76cff0 Jesse Gross        2011-10-25  1082  	}
651887b0c22cff Simon Horman       2014-07-21  1083  
ccc0b9e4657efd Adrian Moreno      2024-04-24  1084  	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
ccc0b9e4657efd Adrian Moreno      2024-04-24  1085  		ret = ovs_psample_packet(dp, key, arg, skb);
ccc0b9e4657efd Adrian Moreno      2024-04-24  1086  		if (ret)
ccc0b9e4657efd Adrian Moreno      2024-04-24  1087  			return ret;
ccc0b9e4657efd Adrian Moreno      2024-04-24  1088  	}
ccc0b9e4657efd Adrian Moreno      2024-04-24  1089  
ccc0b9e4657efd Adrian Moreno      2024-04-24  1090  	if (nla_ok(actions, rem)) {
ccc0b9e4657efd Adrian Moreno      2024-04-24  1091  		clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
ccc0b9e4657efd Adrian Moreno      2024-04-24  1092  		ret = clone_execute(dp, skb, key, 0, actions, rem, last,
bef7f7567a104a andy zhou          2017-03-20  1093  				    clone_flow_key);
ccc0b9e4657efd Adrian Moreno      2024-04-24  1094  	} else if (last) {
ccc0b9e4657efd Adrian Moreno      2024-04-24  1095  		ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);

ret is uninitialized on this last path.

ccc0b9e4657efd Adrian Moreno      2024-04-24  1096  	}
ccc0b9e4657efd Adrian Moreno      2024-04-24 @1097  	return ret;
971427f353f3c4 Andy Zhou          2014-09-15  1098  }
Eelco Chaudron May 3, 2024, 9:43 a.m. UTC | #2
On 24 Apr 2024, at 15:50, Adrian Moreno wrote:

> Add support for psample sampling via two new attributes to the
> OVS_ACTION_ATTR_SAMPLE action.
>
> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
> cookie that will be forwared to psample.
>
> The maximum length of the user-defined cookie is set to 16, same as
> tc_cookie, to discourage using cookies that will not be offloadable.
>
> In order to simplify the internal processing of the action and given the
> maximum size of the cookie is relatively small, add both fields to the
> internal-only struct sample_arg.
>
> The presence of a group_id mandates that the action shall called the
> psample module to multicast the packet with such group_id and the
> user-provided cookie if present. This behavior is orthonogal to
> also executing the nested actions if present.
>
> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>

This is not a full review yet. Just some comments, as I’m looking at the user-space patch first and added similar comments.

I’ll do a proper review of this series once I’m done with user-space part.

//Eelco

> ---
>  Documentation/netlink/specs/ovs_flow.yaml |  6 ++
>  include/uapi/linux/openvswitch.h          | 49 ++++++++++----
>  net/openvswitch/actions.c                 | 51 +++++++++++++--
>  net/openvswitch/flow_netlink.c            | 80 ++++++++++++++++++-----
>  4 files changed, 153 insertions(+), 33 deletions(-)
>
> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
> index 4fdfc6b5cae9..5543c2937225 100644
> --- a/Documentation/netlink/specs/ovs_flow.yaml
> +++ b/Documentation/netlink/specs/ovs_flow.yaml
> @@ -825,6 +825,12 @@ attribute-sets:
>          name: actions
>          type: nest
>          nested-attributes: action-attrs
> +      -
> +        name: psample_group
> +        type: u32
> +      -
> +        name: psample_cookie
> +        type: binary
>    -
>      name: userspace-attrs
>      enum-name: ovs-userspace-attr
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index efc82c318fa2..e9cd6f3a952d 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
>  #define OVS_UFID_F_OMIT_MASK     (1 << 1)
>  #define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
>
> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>  /**
>   * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
>   * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
>   * %UINT32_MAX samples all packets and intermediate values sample intermediate
>   * fractions of packets.
>   * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
> - * Actions are passed as nested attributes.
> + * Actions are passed as nested attributes. Optional if
> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
> + * provided, will be copied to the psample cookie.

As there is a limit of to the cookie should we mention it here?

>   *
> - * Executes the specified actions with the given probability on a per-packet
> - * basis.
> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
> + * specified.
> + *
> + * Executes the specified actions and/or sends the packet to psample
> + * with the given probability on a per-packet basis.
>   */
>  enum ovs_sample_attr {
>  	OVS_SAMPLE_ATTR_UNSPEC,
> -	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
> -	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
> +	OVS_SAMPLE_ATTR_PROBABILITY,	/* u32 number */
> +	OVS_SAMPLE_ATTR_ACTIONS,	/* Nested OVS_ACTION_ATTR_

Missing * after OVS_ACTION_ATTR_

> +					 * attributes.
> +					 */
> +	OVS_SAMPLE_ATTR_PSAMPLE_GROUP,	/* u32 number */
> +	OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,	/* binary */

As these are general sample options, I would not add the PSAMPLE reference. Other data paths could use a different implementation. So I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be enough.

>  	__OVS_SAMPLE_ATTR_MAX,
>
>  #ifdef __KERNEL__
> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
>  #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>
>  #ifdef __KERNEL__
> +
> +/* Definition for flags in struct sample_arg. */
> +enum {
> +	/* When set, actions in sample will not change the flows. */
> +	OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
> +	/* When set, the packet will be sent to psample. */
> +	OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
> +};
> +
>  struct sample_arg {
> -	bool exec;                   /* When true, actions in sample will not
> -				      * change flow keys. False otherwise.
> -				      */
> -	u32  probability;            /* Same value as
> -				      * 'OVS_SAMPLE_ATTR_PROBABILITY'.
> -				      */


Not sure if you can actually do this, you are changing a structure that is part of the UAPI. This change breaks backwards compatibility.


> +	u16 flags;		/* Flags that modify the behavior of the
> +				 * action. See SAMPLE_ARG_FLAG_*.
> +				 */
> +	u32  probability;       /* Same value as
> +				 * 'OVS_SAMPLE_ATTR_PROBABILITY'.
> +				 */
> +	u32  group_id;		/* Same value as
> +				 * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
> +				 */
> +	u8  cookie_len;		/* Length of psample cookie. */
> +	char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */

Would it make sense for the cookie also to be u8?

>  };
>  #endif
>
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 6fcd7e2ca81f..eb3166986fd2 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -24,6 +24,7 @@
>  #include <net/checksum.h>
>  #include <net/dsfield.h>
>  #include <net/mpls.h>
> +#include <net/psample.h>
>  #include <net/sctp/checksum.h>
>
>  #include "datapath.h"
> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>  	return 0;
>  }
>
> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
> +			      const struct sample_arg *arg,
> +			      struct sk_buff *skb)
> +{
> +	struct psample_group psample_group = {};
> +	struct psample_metadata md = {};
> +	struct vport *input_vport;
> +	u32 rate;
> +
> +	psample_group.group_num = arg->group_id;
> +	psample_group.net = ovs_dp_get_net(dp);
> +
> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
> +	if (!input_vport)
> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
> +
> +	md.in_ifindex = input_vport->dev->ifindex;
> +	md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
> +	md.user_cookie_len = arg->cookie_len;
> +	md.trunc_size = skb->len;
> +
> +	rate = arg->probability ? U32_MAX / arg->probability : 0;
> +
> +	psample_sample_packet(&psample_group, skb, rate, &md);

Does this mean now the ovs module, now is dependent on the presence of psample? I think we should only support sampling to psample if the module exists, else we should return an error. There might be distributions not including psample by default.

> +
> +	return 0;
> +}
> +
>  /* When 'last' is true, sample() should always consume the 'skb'.
>   * Otherwise, sample() should keep 'skb' intact regardless what
>   * actions are executed within sample().
> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>  		  struct sw_flow_key *key, const struct nlattr *attr,
>  		  bool last)
>  {
> -	struct nlattr *actions;
> +	const struct sample_arg *arg;
>  	struct nlattr *sample_arg;
>  	int rem = nla_len(attr);
> -	const struct sample_arg *arg;
> +	struct nlattr *actions;
>  	bool clone_flow_key;
> +	int ret;
>
>  	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>  	sample_arg = nla_data(attr);
> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>  		return 0;
>  	}
>
> -	clone_flow_key = !arg->exec;
> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
> -			     clone_flow_key);
> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
> +		ret = ovs_psample_packet(dp, key, arg, skb);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	if (nla_ok(actions, rem)) {
> +		clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
> +		ret = clone_execute(dp, skb, key, 0, actions, rem, last,
> +				    clone_flow_key);
> +	} else if (last) {
> +		ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
> +	}
> +	return ret;
>  }
>
>  /* When 'last' is true, clone() should always consume the 'skb'.
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index f224d9bcea5e..1a348d3905fc 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  				  u32 mpls_label_count, bool log,
>  				  u32 depth);
>
> +static int copy_action(const struct nlattr *from,
> +		       struct sw_flow_actions **sfa, bool log);
> +
>  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  				    const struct sw_flow_key *key,
>  				    struct sw_flow_actions **sfa,
> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  				    u32 depth)
>  {
>  	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
> -	const struct nlattr *probability, *actions;
> +	const struct nlattr *probability, *actions, *group, *cookie;
> +	struct sample_arg arg = {};
>  	const struct nlattr *a;
>  	int rem, start, err;
> -	struct sample_arg arg;
>
>  	memset(attrs, 0, sizeof(attrs));
>  	nla_for_each_nested(a, attr, rem) {
> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  		return -EINVAL;
>
>  	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
> +		return -EINVAL;
> +
> +	group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
> +	if (group && nla_len(group) != sizeof(u32))
> +		return -EINVAL;
> +
> +	cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
> +	if (cookie &&
> +	    (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
> +		return -EINVAL;
> +
> +	if (!group && !actions)
>  		return -EINVAL;
>
>  	/* validation done, copy sample action. */
> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  	 * If the sample is the last action, it can always be excuted
>  	 * rather than deferred.
>  	 */
> -	arg.exec = last || !actions_may_change_flow(actions);
> +	if (actions && (last || !actions_may_change_flow(actions)))
> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
> +
> +	if (group) {
> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
> +		arg.group_id = nla_get_u32(group);
> +	}
> +
> +	if (cookie) {
> +		memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
> +		arg.cookie_len = nla_len(cookie);
> +	}
> +
>  	arg.probability = nla_get_u32(probability);
>
>  	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  	if (err)
>  		return err;
>
> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
> -				     eth_type, vlan_tci, mpls_label_count, log,
> -				     depth + 1);
> -
> -	if (err)
> -		return err;
> +	if (actions) {
> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
> +					     eth_type, vlan_tci,
> +					     mpls_label_count, log, depth + 1);
> +		if (err)
> +			return err;
> +	}
>
>  	add_nested_action_end(*sfa, start);
>
> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
>  		goto out;
>  	}
>
> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
> -	if (!ac_start) {
> -		err = -EMSGSIZE;
> -		goto out;
> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
> +		if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
> +				arg->group_id)) {
> +			err = -EMSGSIZE;
> +			goto out;
> +		}
> +
> +		if (arg->cookie_len &&
> +		    nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
> +			    arg->cookie_len, &arg->cookie[0])) {
> +			err = -EMSGSIZE;
> +			goto out;
> +		}
>  	}
>
> -	err = ovs_nla_put_actions(actions, rem, skb);
> +	if (nla_ok(actions, rem)) {
> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
> +		if (!ac_start) {
> +			err = -EMSGSIZE;
> +			goto out;
> +		}
> +		err = ovs_nla_put_actions(actions, rem, skb);
> +	}
>
>  out:
>  	if (err) {
> -		nla_nest_cancel(skb, ac_start);
> +		if (ac_start)
> +			nla_nest_cancel(skb, ac_start);
>  		nla_nest_cancel(skb, start);
>  	} else {
> -		nla_nest_end(skb, ac_start);
> +		if (ac_start)
> +			nla_nest_end(skb, ac_start);
>  		nla_nest_end(skb, start);
>  	}
>
> -- 
> 2.44.0
Adrián Moreno May 7, 2024, 2:18 p.m. UTC | #3
On 5/3/24 11:43 AM, Eelco Chaudron wrote:
> 
> 
> On 24 Apr 2024, at 15:50, Adrian Moreno wrote:
> 
>> Add support for psample sampling via two new attributes to the
>> OVS_ACTION_ATTR_SAMPLE action.
>>
>> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
>> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
>> cookie that will be forwared to psample.
>>
>> The maximum length of the user-defined cookie is set to 16, same as
>> tc_cookie, to discourage using cookies that will not be offloadable.
>>
>> In order to simplify the internal processing of the action and given the
>> maximum size of the cookie is relatively small, add both fields to the
>> internal-only struct sample_arg.
>>
>> The presence of a group_id mandates that the action shall called the
>> psample module to multicast the packet with such group_id and the
>> user-provided cookie if present. This behavior is orthonogal to
>> also executing the nested actions if present.
>>
>> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
> 
> This is not a full review yet. Just some comments, as I’m looking at the user-space patch first and added similar comments.
> 
> I’ll do a proper review of this series once I’m done with user-space part.
> 
> //Eelco
> 
>> ---
>>   Documentation/netlink/specs/ovs_flow.yaml |  6 ++
>>   include/uapi/linux/openvswitch.h          | 49 ++++++++++----
>>   net/openvswitch/actions.c                 | 51 +++++++++++++--
>>   net/openvswitch/flow_netlink.c            | 80 ++++++++++++++++++-----
>>   4 files changed, 153 insertions(+), 33 deletions(-)
>>
>> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
>> index 4fdfc6b5cae9..5543c2937225 100644
>> --- a/Documentation/netlink/specs/ovs_flow.yaml
>> +++ b/Documentation/netlink/specs/ovs_flow.yaml
>> @@ -825,6 +825,12 @@ attribute-sets:
>>           name: actions
>>           type: nest
>>           nested-attributes: action-attrs
>> +      -
>> +        name: psample_group
>> +        type: u32
>> +      -
>> +        name: psample_cookie
>> +        type: binary
>>     -
>>       name: userspace-attrs
>>       enum-name: ovs-userspace-attr
>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>> index efc82c318fa2..e9cd6f3a952d 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
>>   #define OVS_UFID_F_OMIT_MASK     (1 << 1)
>>   #define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
>>
>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>   /**
>>    * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
>>    * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
>> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
>>    * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>    * fractions of packets.
>>    * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>> - * Actions are passed as nested attributes.
>> + * Actions are passed as nested attributes. Optional if
>> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
>> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
>> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
>> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
>> + * provided, will be copied to the psample cookie.
> 
> As there is a limit of to the cookie should we mention it here?
> 

I thought OVS_PSAMPLE_COOKIE_MAX_SIZE was expressive enough but sure, we can 
also mention it here.

>>    *
>> - * Executes the specified actions with the given probability on a per-packet
>> - * basis.
>> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
>> + * specified.
>> + *
>> + * Executes the specified actions and/or sends the packet to psample
>> + * with the given probability on a per-packet basis.
>>    */
>>   enum ovs_sample_attr {
>>   	OVS_SAMPLE_ATTR_UNSPEC,
>> -	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>> -	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
>> +	OVS_SAMPLE_ATTR_PROBABILITY,	/* u32 number */
>> +	OVS_SAMPLE_ATTR_ACTIONS,	/* Nested OVS_ACTION_ATTR_
> 
> Missing * after OVS_ACTION_ATTR_

As a matter of fact, adding an * makes checkpatch generate a warning IIRC. 
That's why I initially removed it. I can look at fixing checkpatch instead.

> 
>> +					 * attributes.
>> +					 */
>> +	OVS_SAMPLE_ATTR_PSAMPLE_GROUP,	/* u32 number */
>> +	OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,	/* binary */
> 
> As these are general sample options, I would not add the PSAMPLE reference. Other data paths could use a different implementation. So I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be enough.
> 

OK. But isn't the API already psample-ish? I mean that the group_id is something 
specific to psample that might not be present in other datapath implementation.


>>   	__OVS_SAMPLE_ATTR_MAX,
>>
>>   #ifdef __KERNEL__
>> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
>>   #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>>
>>   #ifdef __KERNEL__
>> +
>> +/* Definition for flags in struct sample_arg. */
>> +enum {
>> +	/* When set, actions in sample will not change the flows. */
>> +	OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
>> +	/* When set, the packet will be sent to psample. */
>> +	OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
>> +};
>> +
>>   struct sample_arg {
>> -	bool exec;                   /* When true, actions in sample will not
>> -				      * change flow keys. False otherwise.
>> -				      */
>> -	u32  probability;            /* Same value as
>> -				      * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>> -				      */
> 
> 
> Not sure if you can actually do this, you are changing a structure that is part of the UAPI. This change breaks backwards compatibility.
> 

Hmmm... this the internal argument structure protected by #ifdef __KERNEL__. It 
is used in several actions to optimize the internal action handling (i.e: using 
a compact struct instead of a list of netlink attributes). I guess the reason 
for having it in this file is because the attribute enum is being reused, but I 
wouldn't think of this struct as part of the uAPI.

If the (#ifdef __KERNEL__) does not exclude this struct from the uAPI I think we 
should move it (all of them actually) to some internal file.


> 
>> +	u16 flags;		/* Flags that modify the behavior of the
>> +				 * action. See SAMPLE_ARG_FLAG_*.
>> +				 */
>> +	u32  probability;       /* Same value as
>> +				 * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>> +				 */
>> +	u32  group_id;		/* Same value as
>> +				 * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
>> +				 */
>> +	u8  cookie_len;		/* Length of psample cookie. */
>> +	char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
> 
> Would it make sense for the cookie also to be u8?
> 

Yes, probably.

>>   };
>>   #endif
>>
>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>> index 6fcd7e2ca81f..eb3166986fd2 100644
>> --- a/net/openvswitch/actions.c
>> +++ b/net/openvswitch/actions.c
>> @@ -24,6 +24,7 @@
>>   #include <net/checksum.h>
>>   #include <net/dsfield.h>
>>   #include <net/mpls.h>
>> +#include <net/psample.h>
>>   #include <net/sctp/checksum.h>
>>
>>   #include "datapath.h"
>> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>   	return 0;
>>   }
>>
>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>> +			      const struct sample_arg *arg,
>> +			      struct sk_buff *skb)
>> +{
>> +	struct psample_group psample_group = {};
>> +	struct psample_metadata md = {};
>> +	struct vport *input_vport;
>> +	u32 rate;
>> +
>> +	psample_group.group_num = arg->group_id;
>> +	psample_group.net = ovs_dp_get_net(dp);
>> +
>> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>> +	if (!input_vport)
>> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>> +
>> +	md.in_ifindex = input_vport->dev->ifindex;
>> +	md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
>> +	md.user_cookie_len = arg->cookie_len;
>> +	md.trunc_size = skb->len;
>> +
>> +	rate = arg->probability ? U32_MAX / arg->probability : 0;
>> +
>> +	psample_sample_packet(&psample_group, skb, rate, &md);
> 
> Does this mean now the ovs module, now is dependent on the presence of psample? I think we should only support sampling to psample if the module exists, else we should return an error. There might be distributions not including psample by default.

Agree. I'll add some compile-time checks the same way we do with nf_nat.

> 
>> +
>> +	return 0;
>> +}
>> +
>>   /* When 'last' is true, sample() should always consume the 'skb'.
>>    * Otherwise, sample() should keep 'skb' intact regardless what
>>    * actions are executed within sample().
>> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>   		  struct sw_flow_key *key, const struct nlattr *attr,
>>   		  bool last)
>>   {
>> -	struct nlattr *actions;
>> +	const struct sample_arg *arg;
>>   	struct nlattr *sample_arg;
>>   	int rem = nla_len(attr);
>> -	const struct sample_arg *arg;
>> +	struct nlattr *actions;
>>   	bool clone_flow_key;
>> +	int ret;
>>
>>   	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>   	sample_arg = nla_data(attr);
>> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>   		return 0;
>>   	}
>>
>> -	clone_flow_key = !arg->exec;
>> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
>> -			     clone_flow_key);
>> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>> +		ret = ovs_psample_packet(dp, key, arg, skb);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	if (nla_ok(actions, rem)) {
>> +		clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
>> +		ret = clone_execute(dp, skb, key, 0, actions, rem, last,
>> +				    clone_flow_key);
>> +	} else if (last) {
>> +		ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>> +	}
>> +	return ret;
>>   }
>>
>>   /* When 'last' is true, clone() should always consume the 'skb'.
>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>> index f224d9bcea5e..1a348d3905fc 100644
>> --- a/net/openvswitch/flow_netlink.c
>> +++ b/net/openvswitch/flow_netlink.c
>> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>   				  u32 mpls_label_count, bool log,
>>   				  u32 depth);
>>
>> +static int copy_action(const struct nlattr *from,
>> +		       struct sw_flow_actions **sfa, bool log);
>> +
>>   static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   				    const struct sw_flow_key *key,
>>   				    struct sw_flow_actions **sfa,
>> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   				    u32 depth)
>>   {
>>   	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>> -	const struct nlattr *probability, *actions;
>> +	const struct nlattr *probability, *actions, *group, *cookie;
>> +	struct sample_arg arg = {};
>>   	const struct nlattr *a;
>>   	int rem, start, err;
>> -	struct sample_arg arg;
>>
>>   	memset(attrs, 0, sizeof(attrs));
>>   	nla_for_each_nested(a, attr, rem) {
>> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   		return -EINVAL;
>>
>>   	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>> +		return -EINVAL;
>> +
>> +	group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
>> +	if (group && nla_len(group) != sizeof(u32))
>> +		return -EINVAL;
>> +
>> +	cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
>> +	if (cookie &&
>> +	    (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
>> +		return -EINVAL;
>> +
>> +	if (!group && !actions)
>>   		return -EINVAL;
>>
>>   	/* validation done, copy sample action. */
>> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   	 * If the sample is the last action, it can always be excuted
>>   	 * rather than deferred.
>>   	 */
>> -	arg.exec = last || !actions_may_change_flow(actions);
>> +	if (actions && (last || !actions_may_change_flow(actions)))
>> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
>> +
>> +	if (group) {
>> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
>> +		arg.group_id = nla_get_u32(group);
>> +	}
>> +
>> +	if (cookie) {
>> +		memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
>> +		arg.cookie_len = nla_len(cookie);
>> +	}
>> +
>>   	arg.probability = nla_get_u32(probability);
>>
>>   	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   	if (err)
>>   		return err;
>>
>> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
>> -				     eth_type, vlan_tci, mpls_label_count, log,
>> -				     depth + 1);
>> -
>> -	if (err)
>> -		return err;
>> +	if (actions) {
>> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
>> +					     eth_type, vlan_tci,
>> +					     mpls_label_count, log, depth + 1);
>> +		if (err)
>> +			return err;
>> +	}
>>
>>   	add_nested_action_end(*sfa, start);
>>
>> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>   		goto out;
>>   	}
>>
>> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>> -	if (!ac_start) {
>> -		err = -EMSGSIZE;
>> -		goto out;
>> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>> +		if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
>> +				arg->group_id)) {
>> +			err = -EMSGSIZE;
>> +			goto out;
>> +		}
>> +
>> +		if (arg->cookie_len &&
>> +		    nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
>> +			    arg->cookie_len, &arg->cookie[0])) {
>> +			err = -EMSGSIZE;
>> +			goto out;
>> +		}
>>   	}
>>
>> -	err = ovs_nla_put_actions(actions, rem, skb);
>> +	if (nla_ok(actions, rem)) {
>> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>> +		if (!ac_start) {
>> +			err = -EMSGSIZE;
>> +			goto out;
>> +		}
>> +		err = ovs_nla_put_actions(actions, rem, skb);
>> +	}
>>
>>   out:
>>   	if (err) {
>> -		nla_nest_cancel(skb, ac_start);
>> +		if (ac_start)
>> +			nla_nest_cancel(skb, ac_start);
>>   		nla_nest_cancel(skb, start);
>>   	} else {
>> -		nla_nest_end(skb, ac_start);
>> +		if (ac_start)
>> +			nla_nest_end(skb, ac_start);
>>   		nla_nest_end(skb, start);
>>   	}
>>
>> -- 
>> 2.44.0
>
Eelco Chaudron May 8, 2024, 9:48 a.m. UTC | #4
On 7 May 2024, at 16:18, Adrian Moreno wrote:

> On 5/3/24 11:43 AM, Eelco Chaudron wrote:
>>
>>
>> On 24 Apr 2024, at 15:50, Adrian Moreno wrote:
>>
>>> Add support for psample sampling via two new attributes to the
>>> OVS_ACTION_ATTR_SAMPLE action.
>>>
>>> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
>>> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
>>> cookie that will be forwared to psample.
>>>
>>> The maximum length of the user-defined cookie is set to 16, same as
>>> tc_cookie, to discourage using cookies that will not be offloadable.
>>>
>>> In order to simplify the internal processing of the action and given the
>>> maximum size of the cookie is relatively small, add both fields to the
>>> internal-only struct sample_arg.
>>>
>>> The presence of a group_id mandates that the action shall called the
>>> psample module to multicast the packet with such group_id and the
>>> user-provided cookie if present. This behavior is orthonogal to
>>> also executing the nested actions if present.
>>>
>>> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
>>
>> This is not a full review yet. Just some comments, as I’m looking at the user-space patch first and added similar comments.
>>
>> I’ll do a proper review of this series once I’m done with user-space part.
>>
>> //Eelco
>>
>>> ---
>>>   Documentation/netlink/specs/ovs_flow.yaml |  6 ++
>>>   include/uapi/linux/openvswitch.h          | 49 ++++++++++----
>>>   net/openvswitch/actions.c                 | 51 +++++++++++++--
>>>   net/openvswitch/flow_netlink.c            | 80 ++++++++++++++++++-----
>>>   4 files changed, 153 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
>>> index 4fdfc6b5cae9..5543c2937225 100644
>>> --- a/Documentation/netlink/specs/ovs_flow.yaml
>>> +++ b/Documentation/netlink/specs/ovs_flow.yaml
>>> @@ -825,6 +825,12 @@ attribute-sets:
>>>           name: actions
>>>           type: nest
>>>           nested-attributes: action-attrs
>>> +      -
>>> +        name: psample_group
>>> +        type: u32
>>> +      -
>>> +        name: psample_cookie
>>> +        type: binary
>>>     -
>>>       name: userspace-attrs
>>>       enum-name: ovs-userspace-attr
>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>> index efc82c318fa2..e9cd6f3a952d 100644
>>> --- a/include/uapi/linux/openvswitch.h
>>> +++ b/include/uapi/linux/openvswitch.h
>>> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
>>>   #define OVS_UFID_F_OMIT_MASK     (1 << 1)
>>>   #define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
>>>
>>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>>   /**
>>>    * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
>>>    * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
>>> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
>>>    * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>>    * fractions of packets.
>>>    * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>>> - * Actions are passed as nested attributes.
>>> + * Actions are passed as nested attributes. Optional if
>>> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
>>> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
>>> + * provided, will be copied to the psample cookie.
>>
>> As there is a limit of to the cookie should we mention it here?
>>
>
> I thought OVS_PSAMPLE_COOKIE_MAX_SIZE was expressive enough but sure, we can also mention it here.
>
>>>    *
>>> - * Executes the specified actions with the given probability on a per-packet
>>> - * basis.
>>> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
>>> + * specified.
>>> + *
>>> + * Executes the specified actions and/or sends the packet to psample
>>> + * with the given probability on a per-packet basis.
>>>    */
>>>   enum ovs_sample_attr {
>>>   	OVS_SAMPLE_ATTR_UNSPEC,
>>> -	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>> -	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
>>> +	OVS_SAMPLE_ATTR_PROBABILITY,	/* u32 number */
>>> +	OVS_SAMPLE_ATTR_ACTIONS,	/* Nested OVS_ACTION_ATTR_
>>
>> Missing * after OVS_ACTION_ATTR_
>
> As a matter of fact, adding an * makes checkpatch generate a warning IIRC. That's why I initially removed it. I can look at fixing checkpatch instead.
>
>>
>>> +					 * attributes.
>>> +					 */
>>> +	OVS_SAMPLE_ATTR_PSAMPLE_GROUP,	/* u32 number */
>>> +	OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,	/* binary */
>>
>> As these are general sample options, I would not add the PSAMPLE reference. Other data paths could use a different implementation. So I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be enough.
>>
>
> OK. But isn't the API already psample-ish? I mean that the group_id is something specific to psample that might not be present in other datapath implementation.

Well, I see it as a general GROUP and COOKIE attribute that should be available as part of the SAMPLE being taken/provided by the Datapath implementation.

Not sure what we should call PSAMPLE, but some suggestions are;

  OVS_SAMPLE_ATTR_OFFLOAD/DESTINATION/ENDPOINT/COLLECTOR/TARGET_COOKIE

>>>   	__OVS_SAMPLE_ATTR_MAX,
>>>
>>>   #ifdef __KERNEL__
>>> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
>>>   #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>>>
>>>   #ifdef __KERNEL__
>>> +
>>> +/* Definition for flags in struct sample_arg. */
>>> +enum {
>>> +	/* When set, actions in sample will not change the flows. */
>>> +	OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
>>> +	/* When set, the packet will be sent to psample. */
>>> +	OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
>>> +};
>>> +
>>>   struct sample_arg {
>>> -	bool exec;                   /* When true, actions in sample will not
>>> -				      * change flow keys. False otherwise.
>>> -				      */
>>> -	u32  probability;            /* Same value as
>>> -				      * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> -				      */
>>
>>
>> Not sure if you can actually do this, you are changing a structure that is part of the UAPI. This change breaks backwards compatibility.
>>
>
> Hmmm... this the internal argument structure protected by #ifdef __KERNEL__. It is used in several actions to optimize the internal action handling (i.e: using a compact struct instead of a list of netlink attributes). I guess the reason for having it in this file is because the attribute enum is being reused, but I wouldn't think of this struct as part of the uAPI.
>
> If the (#ifdef __KERNEL__) does not exclude this struct from the uAPI I think we should move it (all of them actually) to some internal file.

You are right, I missed the ‘#ifdef __KERNEL__’ kernel.

>>
>>> +	u16 flags;		/* Flags that modify the behavior of the
>>> +				 * action. See SAMPLE_ARG_FLAG_*.
>>> +				 */
>>> +	u32  probability;       /* Same value as
>>> +				 * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> +				 */
>>> +	u32  group_id;		/* Same value as
>>> +				 * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
>>> +				 */
>>> +	u8  cookie_len;		/* Length of psample cookie. */
>>> +	char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
>>
>> Would it make sense for the cookie also to be u8?
>>
>
> Yes, probably.
>
>>>   };
>>>   #endif
>>>
>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>> index 6fcd7e2ca81f..eb3166986fd2 100644
>>> --- a/net/openvswitch/actions.c
>>> +++ b/net/openvswitch/actions.c
>>> @@ -24,6 +24,7 @@
>>>   #include <net/checksum.h>
>>>   #include <net/dsfield.h>
>>>   #include <net/mpls.h>
>>> +#include <net/psample.h>
>>>   #include <net/sctp/checksum.h>
>>>
>>>   #include "datapath.h"
>>> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>>   	return 0;
>>>   }
>>>
>>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>>> +			      const struct sample_arg *arg,
>>> +			      struct sk_buff *skb)
>>> +{
>>> +	struct psample_group psample_group = {};
>>> +	struct psample_metadata md = {};
>>> +	struct vport *input_vport;
>>> +	u32 rate;
>>> +
>>> +	psample_group.group_num = arg->group_id;
>>> +	psample_group.net = ovs_dp_get_net(dp);
>>> +
>>> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>>> +	if (!input_vport)
>>> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>>> +
>>> +	md.in_ifindex = input_vport->dev->ifindex;
>>> +	md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
>>> +	md.user_cookie_len = arg->cookie_len;
>>> +	md.trunc_size = skb->len;
>>> +
>>> +	rate = arg->probability ? U32_MAX / arg->probability : 0;
>>> +
>>> +	psample_sample_packet(&psample_group, skb, rate, &md);
>>
>> Does this mean now the ovs module, now is dependent on the presence of psample? I think we should only support sampling to psample if the module exists, else we should return an error. There might be distributions not including psample by default.
>
> Agree. I'll add some compile-time checks the same way we do with nf_nat.
>
>>
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>   /* When 'last' is true, sample() should always consume the 'skb'.
>>>    * Otherwise, sample() should keep 'skb' intact regardless what
>>>    * actions are executed within sample().
>>> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>   		  struct sw_flow_key *key, const struct nlattr *attr,
>>>   		  bool last)
>>>   {
>>> -	struct nlattr *actions;
>>> +	const struct sample_arg *arg;
>>>   	struct nlattr *sample_arg;
>>>   	int rem = nla_len(attr);
>>> -	const struct sample_arg *arg;
>>> +	struct nlattr *actions;
>>>   	bool clone_flow_key;
>>> +	int ret;
>>>
>>>   	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>>   	sample_arg = nla_data(attr);
>>> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>   		return 0;
>>>   	}
>>>
>>> -	clone_flow_key = !arg->exec;
>>> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
>>> -			     clone_flow_key);
>>> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> +		ret = ovs_psample_packet(dp, key, arg, skb);
>>> +		if (ret)
>>> +			return ret;
>>> +	}
>>> +
>>> +	if (nla_ok(actions, rem)) {
>>> +		clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
>>> +		ret = clone_execute(dp, skb, key, 0, actions, rem, last,
>>> +				    clone_flow_key);
>>> +	} else if (last) {
>>> +		ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>>> +	}
>>> +	return ret;
>>>   }
>>>
>>>   /* When 'last' is true, clone() should always consume the 'skb'.
>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>> index f224d9bcea5e..1a348d3905fc 100644
>>> --- a/net/openvswitch/flow_netlink.c
>>> +++ b/net/openvswitch/flow_netlink.c
>>> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>>   				  u32 mpls_label_count, bool log,
>>>   				  u32 depth);
>>>
>>> +static int copy_action(const struct nlattr *from,
>>> +		       struct sw_flow_actions **sfa, bool log);
>>> +
>>>   static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   				    const struct sw_flow_key *key,
>>>   				    struct sw_flow_actions **sfa,
>>> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   				    u32 depth)
>>>   {
>>>   	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>>> -	const struct nlattr *probability, *actions;
>>> +	const struct nlattr *probability, *actions, *group, *cookie;
>>> +	struct sample_arg arg = {};
>>>   	const struct nlattr *a;
>>>   	int rem, start, err;
>>> -	struct sample_arg arg;
>>>
>>>   	memset(attrs, 0, sizeof(attrs));
>>>   	nla_for_each_nested(a, attr, rem) {
>>> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   		return -EINVAL;
>>>
>>>   	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>>> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>>> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>>> +		return -EINVAL;
>>> +
>>> +	group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
>>> +	if (group && nla_len(group) != sizeof(u32))
>>> +		return -EINVAL;
>>> +
>>> +	cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
>>> +	if (cookie &&
>>> +	    (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
>>> +		return -EINVAL;
>>> +
>>> +	if (!group && !actions)
>>>   		return -EINVAL;
>>>
>>>   	/* validation done, copy sample action. */
>>> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   	 * If the sample is the last action, it can always be excuted
>>>   	 * rather than deferred.
>>>   	 */
>>> -	arg.exec = last || !actions_may_change_flow(actions);
>>> +	if (actions && (last || !actions_may_change_flow(actions)))
>>> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
>>> +
>>> +	if (group) {
>>> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
>>> +		arg.group_id = nla_get_u32(group);
>>> +	}
>>> +
>>> +	if (cookie) {
>>> +		memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
>>> +		arg.cookie_len = nla_len(cookie);
>>> +	}
>>> +
>>>   	arg.probability = nla_get_u32(probability);
>>>
>>>   	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>>> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   	if (err)
>>>   		return err;
>>>
>>> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> -				     eth_type, vlan_tci, mpls_label_count, log,
>>> -				     depth + 1);
>>> -
>>> -	if (err)
>>> -		return err;
>>> +	if (actions) {
>>> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> +					     eth_type, vlan_tci,
>>> +					     mpls_label_count, log, depth + 1);
>>> +		if (err)
>>> +			return err;
>>> +	}
>>>
>>>   	add_nested_action_end(*sfa, start);
>>>
>>> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>   		goto out;
>>>   	}
>>>
>>> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> -	if (!ac_start) {
>>> -		err = -EMSGSIZE;
>>> -		goto out;
>>> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> +		if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
>>> +				arg->group_id)) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>> +
>>> +		if (arg->cookie_len &&
>>> +		    nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
>>> +			    arg->cookie_len, &arg->cookie[0])) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>>   	}
>>>
>>> -	err = ovs_nla_put_actions(actions, rem, skb);
>>> +	if (nla_ok(actions, rem)) {
>>> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> +		if (!ac_start) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>> +		err = ovs_nla_put_actions(actions, rem, skb);
>>> +	}
>>>
>>>   out:
>>>   	if (err) {
>>> -		nla_nest_cancel(skb, ac_start);
>>> +		if (ac_start)
>>> +			nla_nest_cancel(skb, ac_start);
>>>   		nla_nest_cancel(skb, start);
>>>   	} else {
>>> -		nla_nest_end(skb, ac_start);
>>> +		if (ac_start)
>>> +			nla_nest_end(skb, ac_start);
>>>   		nla_nest_end(skb, start);
>>>   	}
>>>
>>> -- 
>>> 2.44.0
>>
Aaron Conole May 8, 2024, 3:25 p.m. UTC | #5
Adrian Moreno <amorenoz@redhat.com> writes:

> On 5/3/24 11:43 AM, Eelco Chaudron wrote:
>> On 24 Apr 2024, at 15:50, Adrian Moreno wrote:
>> 
>>> Add support for psample sampling via two new attributes to the
>>> OVS_ACTION_ATTR_SAMPLE action.
>>>
>>> OVS_SAMPLE_ATTR_PSAMPLE_GROUP used to pass an integer psample group_id.
>>> OVS_SAMPLE_ATTR_PSAMPLE_COOKIE used to pass a variable-length binary
>>> cookie that will be forwared to psample.
>>>
>>> The maximum length of the user-defined cookie is set to 16, same as
>>> tc_cookie, to discourage using cookies that will not be offloadable.
>>>
>>> In order to simplify the internal processing of the action and given the
>>> maximum size of the cookie is relatively small, add both fields to the
>>> internal-only struct sample_arg.
>>>
>>> The presence of a group_id mandates that the action shall called the
>>> psample module to multicast the packet with such group_id and the
>>> user-provided cookie if present. This behavior is orthonogal to
>>> also executing the nested actions if present.
>>>
>>> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
>> This is not a full review yet. Just some comments, as I’m looking at
>> the user-space patch first and added similar comments.
>> I’ll do a proper review of this series once I’m done with user-space
>> part.
>> //Eelco
>> 
>>> ---
>>>   Documentation/netlink/specs/ovs_flow.yaml |  6 ++
>>>   include/uapi/linux/openvswitch.h          | 49 ++++++++++----
>>>   net/openvswitch/actions.c                 | 51 +++++++++++++--
>>>   net/openvswitch/flow_netlink.c            | 80 ++++++++++++++++++-----
>>>   4 files changed, 153 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
>>> index 4fdfc6b5cae9..5543c2937225 100644
>>> --- a/Documentation/netlink/specs/ovs_flow.yaml
>>> +++ b/Documentation/netlink/specs/ovs_flow.yaml
>>> @@ -825,6 +825,12 @@ attribute-sets:
>>>           name: actions
>>>           type: nest
>>>           nested-attributes: action-attrs
>>> +      -
>>> +        name: psample_group
>>> +        type: u32
>>> +      -
>>> +        name: psample_cookie
>>> +        type: binary
>>>     -
>>>       name: userspace-attrs
>>>       enum-name: ovs-userspace-attr
>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>> index efc82c318fa2..e9cd6f3a952d 100644
>>> --- a/include/uapi/linux/openvswitch.h
>>> +++ b/include/uapi/linux/openvswitch.h
>>> @@ -639,6 +639,7 @@ enum ovs_flow_attr {
>>>   #define OVS_UFID_F_OMIT_MASK     (1 << 1)
>>>   #define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
>>>
>>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>>   /**
>>>    * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
>>>    * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
>>> @@ -646,15 +647,27 @@ enum ovs_flow_attr {
>>>    * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>>    * fractions of packets.
>>>    * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>>> - * Actions are passed as nested attributes.
>>> + * Actions are passed as nested attributes. Optional if
>>> + * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
>>> + * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
>>> + * provided, will be copied to the psample cookie.
>> As there is a limit of to the cookie should we mention it here?
>> 
>
> I thought OVS_PSAMPLE_COOKIE_MAX_SIZE was expressive enough but sure,
> we can also mention it here.
>
>>>    *
>>> - * Executes the specified actions with the given probability on a per-packet
>>> - * basis.
>>> + * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
>>> + * specified.
>>> + *
>>> + * Executes the specified actions and/or sends the packet to psample
>>> + * with the given probability on a per-packet basis.
>>>    */
>>>   enum ovs_sample_attr {
>>>   	OVS_SAMPLE_ATTR_UNSPEC,
>>> -	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>> -	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
>>> +	OVS_SAMPLE_ATTR_PROBABILITY,	/* u32 number */
>>> +	OVS_SAMPLE_ATTR_ACTIONS,	/* Nested OVS_ACTION_ATTR_
>> Missing * after OVS_ACTION_ATTR_
>
> As a matter of fact, adding an * makes checkpatch generate a warning
> IIRC. That's why I initially removed it. I can look at fixing
> checkpatch instead.

I think we can ignore the warning.  Alternatively, consider not changing
the comment spacing for the existing comments.

>> 
>>> +					 * attributes.
>>> +					 */
>>> +	OVS_SAMPLE_ATTR_PSAMPLE_GROUP,	/* u32 number */
>>> +	OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,	/* binary */
>> As these are general sample options, I would not add the PSAMPLE
>> reference. Other data paths could use a different implementation. So
>> I guess OVS_SAMPLE_ATTR_GROUP_ID and OVS_SAMPLE_ATTR_COOKIE would be
>> enough.
>> 
>
> OK. But isn't the API already psample-ish? I mean that the group_id is
> something specific to psample that might not be present in other
> datapath implementation.
>
>
>>>   	__OVS_SAMPLE_ATTR_MAX,
>>>
>>>   #ifdef __KERNEL__
>>> @@ -665,13 +678,27 @@ enum ovs_sample_attr {
>>>   #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
>>>
>>>   #ifdef __KERNEL__
>>> +
>>> +/* Definition for flags in struct sample_arg. */
>>> +enum {
>>> +	/* When set, actions in sample will not change the flows. */
>>> +	OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
>>> +	/* When set, the packet will be sent to psample. */
>>> +	OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
>>> +};
>>> +
>>>   struct sample_arg {
>>> -	bool exec;                   /* When true, actions in sample will not
>>> -				      * change flow keys. False otherwise.
>>> -				      */
>>> -	u32  probability;            /* Same value as
>>> -				      * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> -				      */
>> Not sure if you can actually do this, you are changing a structure
>> that is part of the UAPI. This change breaks backwards
>> compatibility.
>> 
>
> Hmmm... this the internal argument structure protected by #ifdef
> __KERNEL__. It is used in several actions to optimize the internal
> action handling (i.e: using a compact struct instead of a list of
> netlink attributes). I guess the reason for having it in this file is
> because the attribute enum is being reused, but I wouldn't think of
> this struct as part of the uAPI.
>
> If the (#ifdef __KERNEL__) does not exclude this struct from the uAPI
> I think we should move it (all of them actually) to some internal
> file.
>
>> 
>>> +	u16 flags;		/* Flags that modify the behavior of the
>>> +				 * action. See SAMPLE_ARG_FLAG_*.
>>> +				 */
>>> +	u32  probability;       /* Same value as
>>> +				 * 'OVS_SAMPLE_ATTR_PROBABILITY'.
>>> +				 */
>>> +	u32  group_id;		/* Same value as
>>> +				 * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
>>> +				 */
>>> +	u8  cookie_len;		/* Length of psample cookie. */
>>> +	char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
>> Would it make sense for the cookie also to be u8?
>> 
>
> Yes, probably.
>
>>>   };
>>>   #endif
>>>
>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>> index 6fcd7e2ca81f..eb3166986fd2 100644
>>> --- a/net/openvswitch/actions.c
>>> +++ b/net/openvswitch/actions.c
>>> @@ -24,6 +24,7 @@
>>>   #include <net/checksum.h>
>>>   #include <net/dsfield.h>
>>>   #include <net/mpls.h>
>>> +#include <net/psample.h>
>>>   #include <net/sctp/checksum.h>
>>>
>>>   #include "datapath.h"
>>> @@ -1025,6 +1026,34 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>>   	return 0;
>>>   }
>>>
>>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>>> +			      const struct sample_arg *arg,
>>> +			      struct sk_buff *skb)
>>> +{
>>> +	struct psample_group psample_group = {};
>>> +	struct psample_metadata md = {};
>>> +	struct vport *input_vport;
>>> +	u32 rate;
>>> +
>>> +	psample_group.group_num = arg->group_id;
>>> +	psample_group.net = ovs_dp_get_net(dp);
>>> +
>>> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>>> +	if (!input_vport)
>>> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>>> +
>>> +	md.in_ifindex = input_vport->dev->ifindex;
>>> +	md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
>>> +	md.user_cookie_len = arg->cookie_len;
>>> +	md.trunc_size = skb->len;
>>> +
>>> +	rate = arg->probability ? U32_MAX / arg->probability : 0;
>>> +
>>> +	psample_sample_packet(&psample_group, skb, rate, &md);
>> Does this mean now the ovs module, now is dependent on the presence
>> of psample? I think we should only support sampling to psample if
>> the module exists, else we should return an error. There might be
>> distributions not including psample by default.
>
> Agree. I'll add some compile-time checks the same way we do with nf_nat.
>
>> 
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>   /* When 'last' is true, sample() should always consume the 'skb'.
>>>    * Otherwise, sample() should keep 'skb' intact regardless what
>>>    * actions are executed within sample().
>>> @@ -1033,11 +1062,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>   		  struct sw_flow_key *key, const struct nlattr *attr,
>>>   		  bool last)
>>>   {
>>> -	struct nlattr *actions;
>>> +	const struct sample_arg *arg;
>>>   	struct nlattr *sample_arg;
>>>   	int rem = nla_len(attr);
>>> -	const struct sample_arg *arg;
>>> +	struct nlattr *actions;
>>>   	bool clone_flow_key;
>>> +	int ret;
>>>
>>>   	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>>   	sample_arg = nla_data(attr);
>>> @@ -1051,9 +1081,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>   		return 0;
>>>   	}
>>>
>>> -	clone_flow_key = !arg->exec;
>>> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
>>> -			     clone_flow_key);
>>> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> +		ret = ovs_psample_packet(dp, key, arg, skb);
>>> +		if (ret)
>>> +			return ret;
>>> +	}
>>> +
>>> +	if (nla_ok(actions, rem)) {
>>> +		clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
>>> +		ret = clone_execute(dp, skb, key, 0, actions, rem, last,
>>> +				    clone_flow_key);
>>> +	} else if (last) {
>>> +		ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>>> +	}
>>> +	return ret;
>>>   }
>>>
>>>   /* When 'last' is true, clone() should always consume the 'skb'.
>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>> index f224d9bcea5e..1a348d3905fc 100644
>>> --- a/net/openvswitch/flow_netlink.c
>>> +++ b/net/openvswitch/flow_netlink.c
>>> @@ -2561,6 +2561,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>>   				  u32 mpls_label_count, bool log,
>>>   				  u32 depth);
>>>
>>> +static int copy_action(const struct nlattr *from,
>>> +		       struct sw_flow_actions **sfa, bool log);
>>> +
>>>   static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   				    const struct sw_flow_key *key,
>>>   				    struct sw_flow_actions **sfa,
>>> @@ -2569,10 +2572,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   				    u32 depth)
>>>   {
>>>   	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>>> -	const struct nlattr *probability, *actions;
>>> +	const struct nlattr *probability, *actions, *group, *cookie;
>>> +	struct sample_arg arg = {};
>>>   	const struct nlattr *a;
>>>   	int rem, start, err;
>>> -	struct sample_arg arg;
>>>
>>>   	memset(attrs, 0, sizeof(attrs));
>>>   	nla_for_each_nested(a, attr, rem) {
>>> @@ -2589,7 +2592,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   		return -EINVAL;
>>>
>>>   	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>>> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>>> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>>> +		return -EINVAL;
>>> +
>>> +	group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
>>> +	if (group && nla_len(group) != sizeof(u32))
>>> +		return -EINVAL;
>>> +
>>> +	cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
>>> +	if (cookie &&
>>> +	    (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
>>> +		return -EINVAL;
>>> +
>>> +	if (!group && !actions)
>>>   		return -EINVAL;
>>>
>>>   	/* validation done, copy sample action. */
>>> @@ -2608,7 +2623,19 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   	 * If the sample is the last action, it can always be excuted
>>>   	 * rather than deferred.
>>>   	 */
>>> -	arg.exec = last || !actions_may_change_flow(actions);
>>> +	if (actions && (last || !actions_may_change_flow(actions)))
>>> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
>>> +
>>> +	if (group) {
>>> +		arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
>>> +		arg.group_id = nla_get_u32(group);
>>> +	}
>>> +
>>> +	if (cookie) {
>>> +		memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
>>> +		arg.cookie_len = nla_len(cookie);
>>> +	}
>>> +
>>>   	arg.probability = nla_get_u32(probability);
>>>
>>>   	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>>> @@ -2616,12 +2643,13 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   	if (err)
>>>   		return err;
>>>
>>> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> -				     eth_type, vlan_tci, mpls_label_count, log,
>>> -				     depth + 1);
>>> -
>>> -	if (err)
>>> -		return err;
>>> +	if (actions) {
>>> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> +					     eth_type, vlan_tci,
>>> +					     mpls_label_count, log, depth + 1);
>>> +		if (err)
>>> +			return err;
>>> +	}
>>>
>>>   	add_nested_action_end(*sfa, start);
>>>
>>> @@ -3553,20 +3581,38 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>   		goto out;
>>>   	}
>>>
>>> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> -	if (!ac_start) {
>>> -		err = -EMSGSIZE;
>>> -		goto out;
>>> +	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
>>> +		if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
>>> +				arg->group_id)) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>> +
>>> +		if (arg->cookie_len &&
>>> +		    nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
>>> +			    arg->cookie_len, &arg->cookie[0])) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>>   	}
>>>
>>> -	err = ovs_nla_put_actions(actions, rem, skb);
>>> +	if (nla_ok(actions, rem)) {
>>> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> +		if (!ac_start) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>> +		err = ovs_nla_put_actions(actions, rem, skb);
>>> +	}
>>>
>>>   out:
>>>   	if (err) {
>>> -		nla_nest_cancel(skb, ac_start);
>>> +		if (ac_start)
>>> +			nla_nest_cancel(skb, ac_start);
>>>   		nla_nest_cancel(skb, start);
>>>   	} else {
>>> -		nla_nest_end(skb, ac_start);
>>> +		if (ac_start)
>>> +			nla_nest_end(skb, ac_start);
>>>   		nla_nest_end(skb, start);
>>>   	}
>>>
>>> -- 2.44.0
>>
diff mbox series

Patch

diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
index 4fdfc6b5cae9..5543c2937225 100644
--- a/Documentation/netlink/specs/ovs_flow.yaml
+++ b/Documentation/netlink/specs/ovs_flow.yaml
@@ -825,6 +825,12 @@  attribute-sets:
         name: actions
         type: nest
         nested-attributes: action-attrs
+      -
+        name: psample_group
+        type: u32
+      -
+        name: psample_cookie
+        type: binary
   -
     name: userspace-attrs
     enum-name: ovs-userspace-attr
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index efc82c318fa2..e9cd6f3a952d 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -639,6 +639,7 @@  enum ovs_flow_attr {
 #define OVS_UFID_F_OMIT_MASK     (1 << 1)
 #define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
 
+#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
 /**
  * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
  * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
@@ -646,15 +647,27 @@  enum ovs_flow_attr {
  * %UINT32_MAX samples all packets and intermediate values sample intermediate
  * fractions of packets.
  * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
- * Actions are passed as nested attributes.
+ * Actions are passed as nested attributes. Optional if
+ * OVS_SAMPLE_ATTR_PSAMPLE_GROUP is set.
+ * @OVS_SAMPLE_ATTR_PSAMPLE_GROUP: A 32-bit number to be used as psample group.
+ * Optional if OVS_SAMPLE_ATTR_ACTIONS is set.
+ * @OVS_SAMPLE_ATTR_PSAMPLE_COOKIE: A variable-length binary cookie that, if
+ * provided, will be copied to the psample cookie.
  *
- * Executes the specified actions with the given probability on a per-packet
- * basis.
+ * Either OVS_SAMPLE_ATTR_PSAMPLE_GROUP or OVS_SAMPLE_ATTR_ACTIONS must be
+ * specified.
+ *
+ * Executes the specified actions and/or sends the packet to psample
+ * with the given probability on a per-packet basis.
  */
 enum ovs_sample_attr {
 	OVS_SAMPLE_ATTR_UNSPEC,
-	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
-	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_SAMPLE_ATTR_PROBABILITY,	/* u32 number */
+	OVS_SAMPLE_ATTR_ACTIONS,	/* Nested OVS_ACTION_ATTR_
+					 * attributes.
+					 */
+	OVS_SAMPLE_ATTR_PSAMPLE_GROUP,	/* u32 number */
+	OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,	/* binary */
 	__OVS_SAMPLE_ATTR_MAX,
 
 #ifdef __KERNEL__
@@ -665,13 +678,27 @@  enum ovs_sample_attr {
 #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
 
 #ifdef __KERNEL__
+
+/* Definition for flags in struct sample_arg. */
+enum {
+	/* When set, actions in sample will not change the flows. */
+	OVS_SAMPLE_ARG_FLAG_EXEC = 1 << 0,
+	/* When set, the packet will be sent to psample. */
+	OVS_SAMPLE_ARG_FLAG_PSAMPLE = 1 << 1,
+};
+
 struct sample_arg {
-	bool exec;                   /* When true, actions in sample will not
-				      * change flow keys. False otherwise.
-				      */
-	u32  probability;            /* Same value as
-				      * 'OVS_SAMPLE_ATTR_PROBABILITY'.
-				      */
+	u16 flags;		/* Flags that modify the behavior of the
+				 * action. See SAMPLE_ARG_FLAG_*.
+				 */
+	u32  probability;       /* Same value as
+				 * 'OVS_SAMPLE_ATTR_PROBABILITY'.
+				 */
+	u32  group_id;		/* Same value as
+				 * 'OVS_SAMPLE_ATTR_PSAMPLE_GROUP'.
+				 */
+	u8  cookie_len;		/* Length of psample cookie. */
+	char cookie[OVS_PSAMPLE_COOKIE_MAX_SIZE]; /* psample cookie data. */
 };
 #endif
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6fcd7e2ca81f..eb3166986fd2 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -24,6 +24,7 @@ 
 #include <net/checksum.h>
 #include <net/dsfield.h>
 #include <net/mpls.h>
+#include <net/psample.h>
 #include <net/sctp/checksum.h>
 
 #include "datapath.h"
@@ -1025,6 +1026,34 @@  static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
 	return 0;
 }
 
+static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
+			      const struct sample_arg *arg,
+			      struct sk_buff *skb)
+{
+	struct psample_group psample_group = {};
+	struct psample_metadata md = {};
+	struct vport *input_vport;
+	u32 rate;
+
+	psample_group.group_num = arg->group_id;
+	psample_group.net = ovs_dp_get_net(dp);
+
+	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
+	if (!input_vport)
+		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
+
+	md.in_ifindex = input_vport->dev->ifindex;
+	md.user_cookie = arg->cookie_len ? &arg->cookie[0] : NULL;
+	md.user_cookie_len = arg->cookie_len;
+	md.trunc_size = skb->len;
+
+	rate = arg->probability ? U32_MAX / arg->probability : 0;
+
+	psample_sample_packet(&psample_group, skb, rate, &md);
+
+	return 0;
+}
+
 /* When 'last' is true, sample() should always consume the 'skb'.
  * Otherwise, sample() should keep 'skb' intact regardless what
  * actions are executed within sample().
@@ -1033,11 +1062,12 @@  static int sample(struct datapath *dp, struct sk_buff *skb,
 		  struct sw_flow_key *key, const struct nlattr *attr,
 		  bool last)
 {
-	struct nlattr *actions;
+	const struct sample_arg *arg;
 	struct nlattr *sample_arg;
 	int rem = nla_len(attr);
-	const struct sample_arg *arg;
+	struct nlattr *actions;
 	bool clone_flow_key;
+	int ret;
 
 	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
 	sample_arg = nla_data(attr);
@@ -1051,9 +1081,20 @@  static int sample(struct datapath *dp, struct sk_buff *skb,
 		return 0;
 	}
 
-	clone_flow_key = !arg->exec;
-	return clone_execute(dp, skb, key, 0, actions, rem, last,
-			     clone_flow_key);
+	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
+		ret = ovs_psample_packet(dp, key, arg, skb);
+		if (ret)
+			return ret;
+	}
+
+	if (nla_ok(actions, rem)) {
+		clone_flow_key = !(arg->flags & OVS_SAMPLE_ARG_FLAG_EXEC);
+		ret = clone_execute(dp, skb, key, 0, actions, rem, last,
+				    clone_flow_key);
+	} else if (last) {
+		ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
+	}
+	return ret;
 }
 
 /* When 'last' is true, clone() should always consume the 'skb'.
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f224d9bcea5e..1a348d3905fc 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2561,6 +2561,9 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 				  u32 mpls_label_count, bool log,
 				  u32 depth);
 
+static int copy_action(const struct nlattr *from,
+		       struct sw_flow_actions **sfa, bool log);
+
 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 				    const struct sw_flow_key *key,
 				    struct sw_flow_actions **sfa,
@@ -2569,10 +2572,10 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 				    u32 depth)
 {
 	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
-	const struct nlattr *probability, *actions;
+	const struct nlattr *probability, *actions, *group, *cookie;
+	struct sample_arg arg = {};
 	const struct nlattr *a;
 	int rem, start, err;
-	struct sample_arg arg;
 
 	memset(attrs, 0, sizeof(attrs));
 	nla_for_each_nested(a, attr, rem) {
@@ -2589,7 +2592,19 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 		return -EINVAL;
 
 	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
-	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
+
+	group = attrs[OVS_SAMPLE_ATTR_PSAMPLE_GROUP];
+	if (group && nla_len(group) != sizeof(u32))
+		return -EINVAL;
+
+	cookie = attrs[OVS_SAMPLE_ATTR_PSAMPLE_COOKIE];
+	if (cookie &&
+	    (!group || nla_len(cookie) > OVS_PSAMPLE_COOKIE_MAX_SIZE))
+		return -EINVAL;
+
+	if (!group && !actions)
 		return -EINVAL;
 
 	/* validation done, copy sample action. */
@@ -2608,7 +2623,19 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	 * If the sample is the last action, it can always be excuted
 	 * rather than deferred.
 	 */
-	arg.exec = last || !actions_may_change_flow(actions);
+	if (actions && (last || !actions_may_change_flow(actions)))
+		arg.flags |= OVS_SAMPLE_ARG_FLAG_EXEC;
+
+	if (group) {
+		arg.flags |= OVS_SAMPLE_ARG_FLAG_PSAMPLE;
+		arg.group_id = nla_get_u32(group);
+	}
+
+	if (cookie) {
+		memcpy(&arg.cookie[0], nla_data(cookie), nla_len(cookie));
+		arg.cookie_len = nla_len(cookie);
+	}
+
 	arg.probability = nla_get_u32(probability);
 
 	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
@@ -2616,12 +2643,13 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	if (err)
 		return err;
 
-	err = __ovs_nla_copy_actions(net, actions, key, sfa,
-				     eth_type, vlan_tci, mpls_label_count, log,
-				     depth + 1);
-
-	if (err)
-		return err;
+	if (actions) {
+		err = __ovs_nla_copy_actions(net, actions, key, sfa,
+					     eth_type, vlan_tci,
+					     mpls_label_count, log, depth + 1);
+		if (err)
+			return err;
+	}
 
 	add_nested_action_end(*sfa, start);
 
@@ -3553,20 +3581,38 @@  static int sample_action_to_attr(const struct nlattr *attr,
 		goto out;
 	}
 
-	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
-	if (!ac_start) {
-		err = -EMSGSIZE;
-		goto out;
+	if (arg->flags & OVS_SAMPLE_ARG_FLAG_PSAMPLE) {
+		if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PSAMPLE_GROUP,
+				arg->group_id)) {
+			err = -EMSGSIZE;
+			goto out;
+		}
+
+		if (arg->cookie_len &&
+		    nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE_COOKIE,
+			    arg->cookie_len, &arg->cookie[0])) {
+			err = -EMSGSIZE;
+			goto out;
+		}
 	}
 
-	err = ovs_nla_put_actions(actions, rem, skb);
+	if (nla_ok(actions, rem)) {
+		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
+		if (!ac_start) {
+			err = -EMSGSIZE;
+			goto out;
+		}
+		err = ovs_nla_put_actions(actions, rem, skb);
+	}
 
 out:
 	if (err) {
-		nla_nest_cancel(skb, ac_start);
+		if (ac_start)
+			nla_nest_cancel(skb, ac_start);
 		nla_nest_cancel(skb, start);
 	} else {
-		nla_nest_end(skb, ac_start);
+		if (ac_start)
+			nla_nest_end(skb, ac_start);
 		nla_nest_end(skb, start);
 	}