diff mbox series

[net-next,v2,8/8] netdevsim: Add support for add and delete PCI SF port

Message ID 20200917172020.26484-9-parav@nvidia.com
State Changes Requested
Delegated to: David Miller
Headers show
Series devlink: Add SF add/delete devlink ops | expand

Commit Message

Parav Pandit Sept. 17, 2020, 5:20 p.m. UTC
Simulate PCI SF ports. Allow user to create one or more PCI SF ports.

Examples:

Create a PCI PF and PCI SF port.
$ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0
$ devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 sfnum 44
$ devlink port show netdevsim/netdevsim10/11
netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour pcisf controller 0 pfnum 0 sfnum 44 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00 state inactive

$ devlink port function set netdevsim/netdevsim10/11 hw_addr 00:11:22:33:44:55 state active

$ devlink port show netdevsim/netdevsim10/11 -jp
{
    "port": {
        "netdevsim/netdevsim10/11": {
            "type": "eth",
            "netdev": "eni10npf0sf44",
            "flavour": "pcisf",
            "controller": 0,
            "pfnum": 0,
            "sfnum": 44,
            "external": true,
            "splittable": false,
            "function": {
                "hw_addr": "00:11:22:33:44:55",
                "state": "active"
            }
        }
    }
}

Delete newly added devlink port
$ devlink port add netdevsim/netdevsim10/11

Add devlink port of flavour 'pcisf' where port index and sfnum are
auto assigned by driver.
$ devlink port add netdevsim/netdevsim10 flavour pcisf controller 0 pfnum 0

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 drivers/net/netdevsim/netdevsim.h     |  1 +
 drivers/net/netdevsim/port_function.c | 95 +++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 4 deletions(-)

Comments

David Ahern Sept. 17, 2020, 8:31 p.m. UTC | #1
On 9/17/20 11:20 AM, Parav Pandit wrote:
> Simulate PCI SF ports. Allow user to create one or more PCI SF ports.
> 
> Examples:
> 
> Create a PCI PF and PCI SF port.
> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0
> $ devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 sfnum 44
> $ devlink port show netdevsim/netdevsim10/11
> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour pcisf controller 0 pfnum 0 sfnum 44 external true splittable false
>   function:
>     hw_addr 00:00:00:00:00:00 state inactive
> 
> $ devlink port function set netdevsim/netdevsim10/11 hw_addr 00:11:22:33:44:55 state active
> 
> $ devlink port show netdevsim/netdevsim10/11 -jp
> {
>     "port": {
>         "netdevsim/netdevsim10/11": {
>             "type": "eth",
>             "netdev": "eni10npf0sf44",

I could be missing something, but it does not seem like this patch
creates the netdevice for the subfunction.


>             "flavour": "pcisf",
>             "controller": 0,
>             "pfnum": 0,
>             "sfnum": 44,
>             "external": true,
>             "splittable": false,
>             "function": {
>                 "hw_addr": "00:11:22:33:44:55",
>                 "state": "active"
>             }
>         }
>     }
> }
> 
> Delete newly added devlink port
> $ devlink port add netdevsim/netdevsim10/11
> 
> Add devlink port of flavour 'pcisf' where port index and sfnum are
> auto assigned by driver.
> $ devlink port add netdevsim/netdevsim10 flavour pcisf controller 0 pfnum 0
> 
> Signed-off-by: Parav Pandit <parav@nvidia.com>
> Reviewed-by: Jiri Pirko <jiri@nvidia.com>
> ---
>  drivers/net/netdevsim/netdevsim.h     |  1 +
>  drivers/net/netdevsim/port_function.c | 95 +++++++++++++++++++++++++--
>  2 files changed, 92 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> index 0ea9705eda38..c70782e444d5 100644
> --- a/drivers/net/netdevsim/netdevsim.h
> +++ b/drivers/net/netdevsim/netdevsim.h
> @@ -222,6 +222,7 @@ struct nsim_dev {
>  		struct list_head head;
>  		struct ida ida;
>  		struct ida pfnum_ida;
> +		struct ida sfnum_ida;
>  	} port_functions;
>  };
>  
> diff --git a/drivers/net/netdevsim/port_function.c b/drivers/net/netdevsim/port_function.c
> index 99581d3d15fe..e1812acd55b4 100644
> --- a/drivers/net/netdevsim/port_function.c
> +++ b/drivers/net/netdevsim/port_function.c
> @@ -13,10 +13,12 @@ struct nsim_port_function {
>  	unsigned int port_index;
>  	enum devlink_port_flavour flavour;
>  	u32 controller;
> +	u32 sfnum;
>  	u16 pfnum;
>  	struct nsim_port_function *pf_port; /* Valid only for SF port */
>  	u8 hw_addr[ETH_ALEN];
>  	u8 state; /* enum devlink_port_function_state */
> +	int refcount; /* Counts how many sf ports are bound attached to this pf port. */
>  };
>  
>  void nsim_dev_port_function_init(struct nsim_dev *nsim_dev)
> @@ -25,10 +27,13 @@ void nsim_dev_port_function_init(struct nsim_dev *nsim_dev)
>  	INIT_LIST_HEAD(&nsim_dev->port_functions.head);
>  	ida_init(&nsim_dev->port_functions.ida);
>  	ida_init(&nsim_dev->port_functions.pfnum_ida);
> +	ida_init(&nsim_dev->port_functions.sfnum_ida);
>  }
>  
>  void nsim_dev_port_function_exit(struct nsim_dev *nsim_dev)
>  {
> +	WARN_ON(!ida_is_empty(&nsim_dev->port_functions.sfnum_ida));
> +	ida_destroy(&nsim_dev->port_functions.sfnum_ida);
>  	WARN_ON(!ida_is_empty(&nsim_dev->port_functions.pfnum_ida));
>  	ida_destroy(&nsim_dev->port_functions.pfnum_ida);
>  	WARN_ON(!ida_is_empty(&nsim_dev->port_functions.ida));
> @@ -119,9 +124,24 @@ nsim_devlink_port_function_alloc(struct nsim_dev *dev, const struct devlink_port
>  			goto fn_ida_err;
>  		port->pfnum = ret;
>  		break;
> +	case DEVLINK_PORT_FLAVOUR_PCI_SF:
> +		if (attrs->sfnum_valid)
> +			ret = ida_alloc_range(&dev->port_functions.sfnum_ida, attrs->sfnum,
> +					      attrs->sfnum, GFP_KERNEL);
> +		else
> +			ret = ida_alloc(&dev->port_functions.sfnum_ida, GFP_KERNEL);
> +		if (ret < 0)
> +			goto fn_ida_err;
> +		port->sfnum = ret;
> +		port->pfnum = attrs->pfnum;
> +		break;
>  	default:
>  		break;
>  	}
> +	/* refcount_t is not needed as port is protected by port_functions.mutex.
> +	 * This count is to keep track of how many SF ports are attached a PF port.
> +	 */
> +	port->refcount = 1;
>  	return port;
>  
>  fn_ida_err:
> @@ -137,6 +157,9 @@ static void nsim_devlink_port_function_free(struct nsim_dev *dev, struct nsim_po
>  	case DEVLINK_PORT_FLAVOUR_PCI_PF:
>  		ida_simple_remove(&dev->port_functions.pfnum_ida, port->pfnum);
>  		break;
> +	case DEVLINK_PORT_FLAVOUR_PCI_SF:
> +		ida_simple_remove(&dev->port_functions.sfnum_ida, port->sfnum);
> +		break;
>  	default:
>  		break;
>  	}
> @@ -170,6 +193,11 @@ nsim_dev_port_port_exists(struct nsim_dev *nsim_dev, const struct devlink_port_n
>  		if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF &&
>  		    tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF && tmp->pfnum == attrs->pfnum)
>  			return true;
> +
> +		if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
> +		    tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
> +		    tmp->sfnum == attrs->sfnum && tmp->pfnum == attrs->pfnum)
> +			return true;
>  	}
>  	return false;
>  }
> @@ -183,21 +211,71 @@ nsim_dev_devlink_port_index_lookup(const struct nsim_dev *nsim_dev, unsigned int
>  	list_for_each_entry(port, &nsim_dev->port_functions.head, list) {
>  		if (port->port_index != port_index)
>  			continue;
> +		if (port->refcount > 1) {
> +			NL_SET_ERR_MSG_MOD(extack, "Port is in use");
> +			return ERR_PTR(-EBUSY);
> +		}
>  		return port;
>  	}
>  	NL_SET_ERR_MSG_MOD(extack, "User created port not found");
>  	return ERR_PTR(-ENOENT);
>  }
>  
> +static struct nsim_port_function *
> +pf_port_get(struct nsim_dev *nsim_dev, struct nsim_port_function *port)
> +{
> +	struct nsim_port_function *tmp;
> +
> +	/* PF port addition doesn't need a parent. */
> +	if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF)
> +		return NULL;
> +
> +	list_for_each_entry(tmp, &nsim_dev->port_functions.head, list) {
> +		if (tmp->flavour != DEVLINK_PORT_FLAVOUR_PCI_PF || tmp->pfnum != port->pfnum)
> +			continue;
> +
> +		if (tmp->refcount + 1 == INT_MAX)
> +			return ERR_PTR(-ENOSPC);
> +
> +		port->pf_port = tmp;
> +		tmp->refcount++;
> +		return tmp;
> +	}
> +	return ERR_PTR(-ENOENT);
> +}
> +
> +static void pf_port_put(struct nsim_port_function *port)
> +{
> +	if (port->pf_port) {
> +		port->pf_port->refcount--;
> +		WARN_ON(port->pf_port->refcount < 0);
> +	}
> +	port->refcount--;
> +	WARN_ON(port->refcount != 0);
> +}
> +
>  static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_dev *nsim_dev,
>  					  struct nsim_port_function *port,
>  					  struct netlink_ext_ack *extack)
>  {
> +	struct nsim_port_function *pf_port;
>  	int err;
>  
> -	list_add(&port->list, &nsim_dev->port_functions.head);
> +	/* Keep all PF ports at the start, so that when driver is unloaded
> +	 * All SF ports from the end of the list can be removed first.
> +	 */
> +	if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF)
> +		list_add(&port->list, &nsim_dev->port_functions.head);
> +	else
> +		list_add_tail(&port->list, &nsim_dev->port_functions.head);
> +
> +	pf_port = pf_port_get(nsim_dev, port);
> +	if (IS_ERR(pf_port)) {
> +		NL_SET_ERR_MSG_MOD(extack, "Fail to get pf port");
> +		err = PTR_ERR(pf_port);
> +		goto pf_err;
> +	}
>  
> -	port->state = DEVLINK_PORT_FUNCTION_STATE_INACTIVE;
>  	err = devlink_port_register(devlink, &port->dl_port, port->port_index);
>  	if (err)
>  		goto reg_err;
> @@ -213,6 +291,8 @@ static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_d
>  	devlink_port_type_clear(&port->dl_port);
>  	devlink_port_unregister(&port->dl_port);
>  reg_err:
> +	pf_port_put(port);
> +pf_err:
>  	list_del(&port->list);
>  	return err;
>  }
> @@ -224,12 +304,14 @@ static void nsim_devlink_port_function_del(struct nsim_dev *nsim_dev,
>  	unregister_netdev(port->netdev);
>  	devlink_port_unregister(&port->dl_port);
>  	list_del(&port->list);
> +	pf_port_put(port);
>  }
>  
>  static bool nsim_dev_port_flavour_supported(const struct nsim_dev *nsim_dev,
>  					    const struct devlink_port_new_attrs *attrs)
>  {
> -	return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF;
> +	return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF ||
> +	       attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF;
>  }
>  
>  int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *attrs,
> @@ -266,7 +348,11 @@ int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port
>  	       nsim_dev->switch_id.id_len);
>  	port->dl_port.attrs.switch_id.id_len = nsim_dev->switch_id.id_len;
>  
> -	devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false);
> +	if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF)
> +		devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false);
> +	else
> +		devlink_port_attrs_pci_sf_set(&port->dl_port, port->controller, port->pfnum,
> +					      port->sfnum, false);
>  
>  	err = nsim_devlink_port_function_add(devlink, nsim_dev, port, extack);
>  	if (err)
> @@ -333,6 +419,7 @@ void nsim_dev_port_function_disable(struct nsim_dev *nsim_dev)
>  	 * ports.
>  	 */
>  
> +	/* Remove SF ports first, followed by PF ports. */
>  	list_for_each_entry_safe_reverse(port, tmp, &nsim_dev->port_functions.head, list) {
>  		nsim_devlink_port_function_del(nsim_dev, port);
>  		nsim_devlink_port_function_free(nsim_dev, port);
>
Parav Pandit Sept. 18, 2020, 3:29 a.m. UTC | #2
> From: David Ahern <dsahern@gmail.com>
> Sent: Friday, September 18, 2020 2:01 AM
> 
> On 9/17/20 11:20 AM, Parav Pandit wrote:
> > Simulate PCI SF ports. Allow user to create one or more PCI SF ports.
> >
> > Examples:
> >
> > Create a PCI PF and PCI SF port.
> > $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $
> > devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 sfnum
> > 44 $ devlink port show netdevsim/netdevsim10/11
> > netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour pcisf
> controller 0 pfnum 0 sfnum 44 external true splittable false
> >   function:
> >     hw_addr 00:00:00:00:00:00 state inactive
> >
> > $ devlink port function set netdevsim/netdevsim10/11 hw_addr
> > 00:11:22:33:44:55 state active
> >
> > $ devlink port show netdevsim/netdevsim10/11 -jp {
> >     "port": {
> >         "netdevsim/netdevsim10/11": {
> >             "type": "eth",
> >             "netdev": "eni10npf0sf44",
> 
> I could be missing something, but it does not seem like this patch creates the
> netdevice for the subfunction.
>
The sf port created here is the eswitch port with a valid switch id similar to PF and physical port.
So the netdev created is the representor netdevice.
It is created uniformly for subfunction and pf port flavours.

This series enables user to add PCI PF and subfunction ports.
PF port addition (and its representor netdev) is done in patch 5 [1].
This patch for SF utilizes the same ' struct nsim_port_function' for PF and SF.
Only difference between them is flavour.
[1] https://lore.kernel.org/netdev/20200917172020.26484-6-parav@nvidia.com/

 > 
> >             "flavour": "pcisf",
> >             "controller": 0,
> >             "pfnum": 0,
> >             "sfnum": 44,
> >             "external": true,
> >             "splittable": false,
> >             "function": {
> >                 "hw_addr": "00:11:22:33:44:55",
> >                 "state": "active"
> >             }
> >         }
> >     }
> > }
> >
> > Delete newly added devlink port
> > $ devlink port add netdevsim/netdevsim10/11
> >
> > Add devlink port of flavour 'pcisf' where port index and sfnum are
> > auto assigned by driver.
> > $ devlink port add netdevsim/netdevsim10 flavour pcisf controller 0
> > pfnum 0
David Ahern Sept. 18, 2020, 3:38 a.m. UTC | #3
On 9/17/20 9:29 PM, Parav Pandit wrote:
>>> Examples:
>>>
>>> Create a PCI PF and PCI SF port.
>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $
>>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 sfnum
>>> 44 $ devlink port show netdevsim/netdevsim10/11
>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour pcisf
>> controller 0 pfnum 0 sfnum 44 external true splittable false
>>>   function:
>>>     hw_addr 00:00:00:00:00:00 state inactive
>>>
>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr
>>> 00:11:22:33:44:55 state active
>>>
>>> $ devlink port show netdevsim/netdevsim10/11 -jp {
>>>     "port": {
>>>         "netdevsim/netdevsim10/11": {
>>>             "type": "eth",
>>>             "netdev": "eni10npf0sf44",
>>
>> I could be missing something, but it does not seem like this patch creates the
>> netdevice for the subfunction.
>>
> The sf port created here is the eswitch port with a valid switch id similar to PF and physical port.
> So the netdev created is the representor netdevice.
> It is created uniformly for subfunction and pf port flavours.

To be clear: If I run the devlink commands to create a sub-function, `ip
link show` should list a net_device that corresponds to the sub-function?
Parav Pandit Sept. 18, 2020, 4:41 a.m. UTC | #4
Hi David,

> From: David Ahern <dsahern@gmail.com>
> Sent: Friday, September 18, 2020 9:08 AM
> 
> On 9/17/20 9:29 PM, Parav Pandit wrote:
> >>> Examples:
> >>>
> >>> Create a PCI PF and PCI SF port.
> >>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $
> >>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0
> >>> sfnum
> >>> 44 $ devlink port show netdevsim/netdevsim10/11
> >>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour
> >>> pcisf
> >> controller 0 pfnum 0 sfnum 44 external true splittable false
> >>>   function:
> >>>     hw_addr 00:00:00:00:00:00 state inactive
> >>>
> >>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr
> >>> 00:11:22:33:44:55 state active
> >>>
> >>> $ devlink port show netdevsim/netdevsim10/11 -jp {
> >>>     "port": {
> >>>         "netdevsim/netdevsim10/11": {
> >>>             "type": "eth",
> >>>             "netdev": "eni10npf0sf44",
> >>
> >> I could be missing something, but it does not seem like this patch
> >> creates the netdevice for the subfunction.
> >>
> > The sf port created here is the eswitch port with a valid switch id similar to PF
> and physical port.
> > So the netdev created is the representor netdevice.
> > It is created uniformly for subfunction and pf port flavours.
> 
> To be clear: If I run the devlink commands to create a sub-function, `ip link
> show` should list a net_device that corresponds to the sub-function?

In this series only representor netdevice corresponds to sub-function will be visible in ip link show, i.e. eni10npf0sf44.

Netdevsim is only simulating the eswitch side or control path at present for pf/vf/sf ports.
So other end of this port (netdevice/rdma device/vdpa device) are not yet created.

Subfunction will be anchored on virtbus described in RFC [1], which is not yet in-kernel yet.
Grep for "every SF a device is created on virtbus" to jump to this part of the long RFC.

[1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/
Samudrala, Sridhar Sept. 18, 2020, 4:53 a.m. UTC | #5
On 9/17/2020 9:41 PM, Parav Pandit wrote:
> Hi David,
> 
>> From: David Ahern <dsahern@gmail.com>
>> Sent: Friday, September 18, 2020 9:08 AM
>>
>> On 9/17/20 9:29 PM, Parav Pandit wrote:
>>>>> Examples:
>>>>>
>>>>> Create a PCI PF and PCI SF port.
>>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $
>>>>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0
>>>>> sfnum
>>>>> 44 $ devlink port show netdevsim/netdevsim10/11
>>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour
>>>>> pcisf
>>>> controller 0 pfnum 0 sfnum 44 external true splittable false
>>>>>    function:
>>>>>      hw_addr 00:00:00:00:00:00 state inactive
>>>>>
>>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr
>>>>> 00:11:22:33:44:55 state active
>>>>>
>>>>> $ devlink port show netdevsim/netdevsim10/11 -jp {
>>>>>      "port": {
>>>>>          "netdevsim/netdevsim10/11": {
>>>>>              "type": "eth",
>>>>>              "netdev": "eni10npf0sf44",
>>>>
>>>> I could be missing something, but it does not seem like this patch
>>>> creates the netdevice for the subfunction.
>>>>
>>> The sf port created here is the eswitch port with a valid switch id similar to PF
>> and physical port.
>>> So the netdev created is the representor netdevice.
>>> It is created uniformly for subfunction and pf port flavours.
>>
>> To be clear: If I run the devlink commands to create a sub-function, `ip link
>> show` should list a net_device that corresponds to the sub-function?
> 
> In this series only representor netdevice corresponds to sub-function will be visible in ip link show, i.e. eni10npf0sf44.

This should be OK if the eSwitch mode is changed to switchdev.
But i think it should be possible to create a subfunction even in legacy 
mode where representor netdev is not created.

> 
> Netdevsim is only simulating the eswitch side or control path at present for pf/vf/sf ports.
> So other end of this port (netdevice/rdma device/vdpa device) are not yet created.
> 
> Subfunction will be anchored on virtbus described in RFC [1], which is not yet in-kernel yet.
> Grep for "every SF a device is created on virtbus" to jump to this part of the long RFC.
> 
> [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/
>
Parav Pandit Sept. 18, 2020, 5:10 a.m. UTC | #6
> From: Samudrala, Sridhar <sridhar.samudrala@intel.com>
> Sent: Friday, September 18, 2020 10:23 AM
> 
> 
> On 9/17/2020 9:41 PM, Parav Pandit wrote:
> > Hi David,
> >
> >> From: David Ahern <dsahern@gmail.com>
> >> Sent: Friday, September 18, 2020 9:08 AM
> >>
> >> On 9/17/20 9:29 PM, Parav Pandit wrote:
> >>>>> Examples:
> >>>>>
> >>>>> Create a PCI PF and PCI SF port.
> >>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0
> >>>>> $ devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0
> >>>>> sfnum
> >>>>> 44 $ devlink port show netdevsim/netdevsim10/11
> >>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour
> >>>>> pcisf
> >>>> controller 0 pfnum 0 sfnum 44 external true splittable false
> >>>>>    function:
> >>>>>      hw_addr 00:00:00:00:00:00 state inactive
> >>>>>
> >>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr
> >>>>> 00:11:22:33:44:55 state active
> >>>>>
> >>>>> $ devlink port show netdevsim/netdevsim10/11 -jp {
> >>>>>      "port": {
> >>>>>          "netdevsim/netdevsim10/11": {
> >>>>>              "type": "eth",
> >>>>>              "netdev": "eni10npf0sf44",
> >>>>
> >>>> I could be missing something, but it does not seem like this patch
> >>>> creates the netdevice for the subfunction.
> >>>>
> >>> The sf port created here is the eswitch port with a valid switch id
> >>> similar to PF
> >> and physical port.
> >>> So the netdev created is the representor netdevice.
> >>> It is created uniformly for subfunction and pf port flavours.
> >>
> >> To be clear: If I run the devlink commands to create a sub-function,
> >> `ip link show` should list a net_device that corresponds to the sub-function?
> >
> > In this series only representor netdevice corresponds to sub-function will be
> visible in ip link show, i.e. eni10npf0sf44.
> 
> This should be OK if the eSwitch mode is changed to switchdev.
> But i think it should be possible to create a subfunction even in legacy mode
> where representor netdev is not created.

switch_id is optional attribute of the devlink port.
It is applicable only when it is eswitch port in switchdev mode.

> 
> >
> > Netdevsim is only simulating the eswitch side or control path at present for
> pf/vf/sf ports.
> > So other end of this port (netdevice/rdma device/vdpa device) are not yet
> created.
> >
> > Subfunction will be anchored on virtbus described in RFC [1], which is not yet
> in-kernel yet.
> > Grep for "every SF a device is created on virtbus" to jump to this part of the
> long RFC.
> >
> > [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/
> >
David Ahern Sept. 18, 2020, 3:23 p.m. UTC | #7
On 9/17/20 10:41 PM, Parav Pandit wrote:
> Hi David,
> 
>> From: David Ahern <dsahern@gmail.com>
>> Sent: Friday, September 18, 2020 9:08 AM
>>
>> On 9/17/20 9:29 PM, Parav Pandit wrote:
>>>>> Examples:
>>>>>
>>>>> Create a PCI PF and PCI SF port.
>>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $
>>>>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0
>>>>> sfnum
>>>>> 44 $ devlink port show netdevsim/netdevsim10/11
>>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour
>>>>> pcisf
>>>> controller 0 pfnum 0 sfnum 44 external true splittable false
>>>>>   function:
>>>>>     hw_addr 00:00:00:00:00:00 state inactive
>>>>>
>>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr
>>>>> 00:11:22:33:44:55 state active
>>>>>
>>>>> $ devlink port show netdevsim/netdevsim10/11 -jp {
>>>>>     "port": {
>>>>>         "netdevsim/netdevsim10/11": {
>>>>>             "type": "eth",
>>>>>             "netdev": "eni10npf0sf44",
>>>>
>>>> I could be missing something, but it does not seem like this patch
>>>> creates the netdevice for the subfunction.
>>>>
>>> The sf port created here is the eswitch port with a valid switch id similar to PF
>> and physical port.
>>> So the netdev created is the representor netdevice.
>>> It is created uniformly for subfunction and pf port flavours.
>>
>> To be clear: If I run the devlink commands to create a sub-function, `ip link
>> show` should list a net_device that corresponds to the sub-function?
> 
> In this series only representor netdevice corresponds to sub-function will be visible in ip link show, i.e. eni10npf0sf44.
> 
> Netdevsim is only simulating the eswitch side or control path at present for pf/vf/sf ports.
> So other end of this port (netdevice/rdma device/vdpa device) are not yet created.
> 
> Subfunction will be anchored on virtbus described in RFC [1], which is not yet in-kernel yet.
> Grep for "every SF a device is created on virtbus" to jump to this part of the long RFC.
> 
> [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/
> 

Thanks for the reference. I have seen that. I am interested in this idea
of creating netdevs for 'slices' of an asic or nic, but it is not clear
to me how it connects end to end. Will you be able to create a
sub-function based netdevice, assign it limited resources from the nic
and then assign that netdevice to a container for example?
Parav Pandit Sept. 18, 2020, 3:51 p.m. UTC | #8
> From: David Ahern <dsahern@gmail.com>
> Sent: Friday, September 18, 2020 8:54 PM
> 
> On 9/17/20 10:41 PM, Parav Pandit wrote:
> > Hi David,
> >
> >> From: David Ahern <dsahern@gmail.com>
> >> Sent: Friday, September 18, 2020 9:08 AM
> >>
> >> On 9/17/20 9:29 PM, Parav Pandit wrote:
> >>>>> Examples:
> >>>>>
> >>>>> Create a PCI PF and PCI SF port.
> >>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0
> >>>>> $ devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0
> >>>>> sfnum
> >>>>> 44 $ devlink port show netdevsim/netdevsim10/11
> >>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour
> >>>>> pcisf
> >>>> controller 0 pfnum 0 sfnum 44 external true splittable false
> >>>>>   function:
> >>>>>     hw_addr 00:00:00:00:00:00 state inactive
> >>>>>
> >>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr
> >>>>> 00:11:22:33:44:55 state active
> >>>>>
> >>>>> $ devlink port show netdevsim/netdevsim10/11 -jp {
> >>>>>     "port": {
> >>>>>         "netdevsim/netdevsim10/11": {
> >>>>>             "type": "eth",
> >>>>>             "netdev": "eni10npf0sf44",
> >>>>
> >>>> I could be missing something, but it does not seem like this patch
> >>>> creates the netdevice for the subfunction.
> >>>>
> >>> The sf port created here is the eswitch port with a valid switch id
> >>> similar to PF
> >> and physical port.
> >>> So the netdev created is the representor netdevice.
> >>> It is created uniformly for subfunction and pf port flavours.
> >>
> >> To be clear: If I run the devlink commands to create a sub-function,
> >> `ip link show` should list a net_device that corresponds to the sub-
> function?
> >
> > In this series only representor netdevice corresponds to sub-function will
> be visible in ip link show, i.e. eni10npf0sf44.
> >
> > Netdevsim is only simulating the eswitch side or control path at present for
> pf/vf/sf ports.
> > So other end of this port (netdevice/rdma device/vdpa device) are not yet
> created.
> >
> > Subfunction will be anchored on virtbus described in RFC [1], which is not
> yet in-kernel yet.
> > Grep for "every SF a device is created on virtbus" to jump to this part of the
> long RFC.
> >
> > [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/
> >
> 
> Thanks for the reference. I have seen that. I am interested in this idea of
> creating netdevs for 'slices' of an asic or nic, but it is not clear to me how it
> connects end to end. Will you be able to create a sub-function based
> netdevice, assign it limited resources from the nic and then assign that
> netdevice to a container for example?
Yep. You described is precisely well.
This short series creates eswitch representor for the moment.
Once virtbus is in_kernel, will extend to create actual netdevice of the subfunction too.
diff mbox series

Patch

diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 0ea9705eda38..c70782e444d5 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -222,6 +222,7 @@  struct nsim_dev {
 		struct list_head head;
 		struct ida ida;
 		struct ida pfnum_ida;
+		struct ida sfnum_ida;
 	} port_functions;
 };
 
diff --git a/drivers/net/netdevsim/port_function.c b/drivers/net/netdevsim/port_function.c
index 99581d3d15fe..e1812acd55b4 100644
--- a/drivers/net/netdevsim/port_function.c
+++ b/drivers/net/netdevsim/port_function.c
@@ -13,10 +13,12 @@  struct nsim_port_function {
 	unsigned int port_index;
 	enum devlink_port_flavour flavour;
 	u32 controller;
+	u32 sfnum;
 	u16 pfnum;
 	struct nsim_port_function *pf_port; /* Valid only for SF port */
 	u8 hw_addr[ETH_ALEN];
 	u8 state; /* enum devlink_port_function_state */
+	int refcount; /* Counts how many sf ports are bound attached to this pf port. */
 };
 
 void nsim_dev_port_function_init(struct nsim_dev *nsim_dev)
@@ -25,10 +27,13 @@  void nsim_dev_port_function_init(struct nsim_dev *nsim_dev)
 	INIT_LIST_HEAD(&nsim_dev->port_functions.head);
 	ida_init(&nsim_dev->port_functions.ida);
 	ida_init(&nsim_dev->port_functions.pfnum_ida);
+	ida_init(&nsim_dev->port_functions.sfnum_ida);
 }
 
 void nsim_dev_port_function_exit(struct nsim_dev *nsim_dev)
 {
+	WARN_ON(!ida_is_empty(&nsim_dev->port_functions.sfnum_ida));
+	ida_destroy(&nsim_dev->port_functions.sfnum_ida);
 	WARN_ON(!ida_is_empty(&nsim_dev->port_functions.pfnum_ida));
 	ida_destroy(&nsim_dev->port_functions.pfnum_ida);
 	WARN_ON(!ida_is_empty(&nsim_dev->port_functions.ida));
@@ -119,9 +124,24 @@  nsim_devlink_port_function_alloc(struct nsim_dev *dev, const struct devlink_port
 			goto fn_ida_err;
 		port->pfnum = ret;
 		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_SF:
+		if (attrs->sfnum_valid)
+			ret = ida_alloc_range(&dev->port_functions.sfnum_ida, attrs->sfnum,
+					      attrs->sfnum, GFP_KERNEL);
+		else
+			ret = ida_alloc(&dev->port_functions.sfnum_ida, GFP_KERNEL);
+		if (ret < 0)
+			goto fn_ida_err;
+		port->sfnum = ret;
+		port->pfnum = attrs->pfnum;
+		break;
 	default:
 		break;
 	}
+	/* refcount_t is not needed as port is protected by port_functions.mutex.
+	 * This count is to keep track of how many SF ports are attached a PF port.
+	 */
+	port->refcount = 1;
 	return port;
 
 fn_ida_err:
@@ -137,6 +157,9 @@  static void nsim_devlink_port_function_free(struct nsim_dev *dev, struct nsim_po
 	case DEVLINK_PORT_FLAVOUR_PCI_PF:
 		ida_simple_remove(&dev->port_functions.pfnum_ida, port->pfnum);
 		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_SF:
+		ida_simple_remove(&dev->port_functions.sfnum_ida, port->sfnum);
+		break;
 	default:
 		break;
 	}
@@ -170,6 +193,11 @@  nsim_dev_port_port_exists(struct nsim_dev *nsim_dev, const struct devlink_port_n
 		if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF &&
 		    tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF && tmp->pfnum == attrs->pfnum)
 			return true;
+
+		if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
+		    tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
+		    tmp->sfnum == attrs->sfnum && tmp->pfnum == attrs->pfnum)
+			return true;
 	}
 	return false;
 }
@@ -183,21 +211,71 @@  nsim_dev_devlink_port_index_lookup(const struct nsim_dev *nsim_dev, unsigned int
 	list_for_each_entry(port, &nsim_dev->port_functions.head, list) {
 		if (port->port_index != port_index)
 			continue;
+		if (port->refcount > 1) {
+			NL_SET_ERR_MSG_MOD(extack, "Port is in use");
+			return ERR_PTR(-EBUSY);
+		}
 		return port;
 	}
 	NL_SET_ERR_MSG_MOD(extack, "User created port not found");
 	return ERR_PTR(-ENOENT);
 }
 
+static struct nsim_port_function *
+pf_port_get(struct nsim_dev *nsim_dev, struct nsim_port_function *port)
+{
+	struct nsim_port_function *tmp;
+
+	/* PF port addition doesn't need a parent. */
+	if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF)
+		return NULL;
+
+	list_for_each_entry(tmp, &nsim_dev->port_functions.head, list) {
+		if (tmp->flavour != DEVLINK_PORT_FLAVOUR_PCI_PF || tmp->pfnum != port->pfnum)
+			continue;
+
+		if (tmp->refcount + 1 == INT_MAX)
+			return ERR_PTR(-ENOSPC);
+
+		port->pf_port = tmp;
+		tmp->refcount++;
+		return tmp;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static void pf_port_put(struct nsim_port_function *port)
+{
+	if (port->pf_port) {
+		port->pf_port->refcount--;
+		WARN_ON(port->pf_port->refcount < 0);
+	}
+	port->refcount--;
+	WARN_ON(port->refcount != 0);
+}
+
 static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_dev *nsim_dev,
 					  struct nsim_port_function *port,
 					  struct netlink_ext_ack *extack)
 {
+	struct nsim_port_function *pf_port;
 	int err;
 
-	list_add(&port->list, &nsim_dev->port_functions.head);
+	/* Keep all PF ports at the start, so that when driver is unloaded
+	 * All SF ports from the end of the list can be removed first.
+	 */
+	if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF)
+		list_add(&port->list, &nsim_dev->port_functions.head);
+	else
+		list_add_tail(&port->list, &nsim_dev->port_functions.head);
+
+	pf_port = pf_port_get(nsim_dev, port);
+	if (IS_ERR(pf_port)) {
+		NL_SET_ERR_MSG_MOD(extack, "Fail to get pf port");
+		err = PTR_ERR(pf_port);
+		goto pf_err;
+	}
 
-	port->state = DEVLINK_PORT_FUNCTION_STATE_INACTIVE;
 	err = devlink_port_register(devlink, &port->dl_port, port->port_index);
 	if (err)
 		goto reg_err;
@@ -213,6 +291,8 @@  static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_d
 	devlink_port_type_clear(&port->dl_port);
 	devlink_port_unregister(&port->dl_port);
 reg_err:
+	pf_port_put(port);
+pf_err:
 	list_del(&port->list);
 	return err;
 }
@@ -224,12 +304,14 @@  static void nsim_devlink_port_function_del(struct nsim_dev *nsim_dev,
 	unregister_netdev(port->netdev);
 	devlink_port_unregister(&port->dl_port);
 	list_del(&port->list);
+	pf_port_put(port);
 }
 
 static bool nsim_dev_port_flavour_supported(const struct nsim_dev *nsim_dev,
 					    const struct devlink_port_new_attrs *attrs)
 {
-	return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF;
+	return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF ||
+	       attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF;
 }
 
 int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *attrs,
@@ -266,7 +348,11 @@  int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port
 	       nsim_dev->switch_id.id_len);
 	port->dl_port.attrs.switch_id.id_len = nsim_dev->switch_id.id_len;
 
-	devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false);
+	if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF)
+		devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false);
+	else
+		devlink_port_attrs_pci_sf_set(&port->dl_port, port->controller, port->pfnum,
+					      port->sfnum, false);
 
 	err = nsim_devlink_port_function_add(devlink, nsim_dev, port, extack);
 	if (err)
@@ -333,6 +419,7 @@  void nsim_dev_port_function_disable(struct nsim_dev *nsim_dev)
 	 * ports.
 	 */
 
+	/* Remove SF ports first, followed by PF ports. */
 	list_for_each_entry_safe_reverse(port, tmp, &nsim_dev->port_functions.head, list) {
 		nsim_devlink_port_function_del(nsim_dev, port);
 		nsim_devlink_port_function_free(nsim_dev, port);