Message ID | 20200917172020.26484-9-parav@nvidia.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | devlink: Add SF add/delete devlink ops | expand |
On 9/17/20 11:20 AM, Parav Pandit wrote: > Simulate PCI SF ports. Allow user to create one or more PCI SF ports. > > Examples: > > Create a PCI PF and PCI SF port. > $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 > $ devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 sfnum 44 > $ devlink port show netdevsim/netdevsim10/11 > netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour pcisf controller 0 pfnum 0 sfnum 44 external true splittable false > function: > hw_addr 00:00:00:00:00:00 state inactive > > $ devlink port function set netdevsim/netdevsim10/11 hw_addr 00:11:22:33:44:55 state active > > $ devlink port show netdevsim/netdevsim10/11 -jp > { > "port": { > "netdevsim/netdevsim10/11": { > "type": "eth", > "netdev": "eni10npf0sf44", I could be missing something, but it does not seem like this patch creates the netdevice for the subfunction. > "flavour": "pcisf", > "controller": 0, > "pfnum": 0, > "sfnum": 44, > "external": true, > "splittable": false, > "function": { > "hw_addr": "00:11:22:33:44:55", > "state": "active" > } > } > } > } > > Delete newly added devlink port > $ devlink port add netdevsim/netdevsim10/11 > > Add devlink port of flavour 'pcisf' where port index and sfnum are > auto assigned by driver. > $ devlink port add netdevsim/netdevsim10 flavour pcisf controller 0 pfnum 0 > > Signed-off-by: Parav Pandit <parav@nvidia.com> > Reviewed-by: Jiri Pirko <jiri@nvidia.com> > --- > drivers/net/netdevsim/netdevsim.h | 1 + > drivers/net/netdevsim/port_function.c | 95 +++++++++++++++++++++++++-- > 2 files changed, 92 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h > index 0ea9705eda38..c70782e444d5 100644 > --- a/drivers/net/netdevsim/netdevsim.h > +++ b/drivers/net/netdevsim/netdevsim.h > @@ -222,6 +222,7 @@ struct nsim_dev { > struct list_head head; > struct ida ida; > struct ida pfnum_ida; > + struct ida sfnum_ida; > } port_functions; > }; > > diff --git a/drivers/net/netdevsim/port_function.c b/drivers/net/netdevsim/port_function.c > index 99581d3d15fe..e1812acd55b4 100644 > --- a/drivers/net/netdevsim/port_function.c > +++ b/drivers/net/netdevsim/port_function.c > @@ -13,10 +13,12 @@ struct nsim_port_function { > unsigned int port_index; > enum devlink_port_flavour flavour; > u32 controller; > + u32 sfnum; > u16 pfnum; > struct nsim_port_function *pf_port; /* Valid only for SF port */ > u8 hw_addr[ETH_ALEN]; > u8 state; /* enum devlink_port_function_state */ > + int refcount; /* Counts how many sf ports are bound attached to this pf port. */ > }; > > void nsim_dev_port_function_init(struct nsim_dev *nsim_dev) > @@ -25,10 +27,13 @@ void nsim_dev_port_function_init(struct nsim_dev *nsim_dev) > INIT_LIST_HEAD(&nsim_dev->port_functions.head); > ida_init(&nsim_dev->port_functions.ida); > ida_init(&nsim_dev->port_functions.pfnum_ida); > + ida_init(&nsim_dev->port_functions.sfnum_ida); > } > > void nsim_dev_port_function_exit(struct nsim_dev *nsim_dev) > { > + WARN_ON(!ida_is_empty(&nsim_dev->port_functions.sfnum_ida)); > + ida_destroy(&nsim_dev->port_functions.sfnum_ida); > WARN_ON(!ida_is_empty(&nsim_dev->port_functions.pfnum_ida)); > ida_destroy(&nsim_dev->port_functions.pfnum_ida); > WARN_ON(!ida_is_empty(&nsim_dev->port_functions.ida)); > @@ -119,9 +124,24 @@ nsim_devlink_port_function_alloc(struct nsim_dev *dev, const struct devlink_port > goto fn_ida_err; > port->pfnum = ret; > break; > + case DEVLINK_PORT_FLAVOUR_PCI_SF: > + if (attrs->sfnum_valid) > + ret = ida_alloc_range(&dev->port_functions.sfnum_ida, attrs->sfnum, > + attrs->sfnum, GFP_KERNEL); > + else > + ret = ida_alloc(&dev->port_functions.sfnum_ida, GFP_KERNEL); > + if (ret < 0) > + goto fn_ida_err; > + port->sfnum = ret; > + port->pfnum = attrs->pfnum; > + break; > default: > break; > } > + /* refcount_t is not needed as port is protected by port_functions.mutex. > + * This count is to keep track of how many SF ports are attached a PF port. > + */ > + port->refcount = 1; > return port; > > fn_ida_err: > @@ -137,6 +157,9 @@ static void nsim_devlink_port_function_free(struct nsim_dev *dev, struct nsim_po > case DEVLINK_PORT_FLAVOUR_PCI_PF: > ida_simple_remove(&dev->port_functions.pfnum_ida, port->pfnum); > break; > + case DEVLINK_PORT_FLAVOUR_PCI_SF: > + ida_simple_remove(&dev->port_functions.sfnum_ida, port->sfnum); > + break; > default: > break; > } > @@ -170,6 +193,11 @@ nsim_dev_port_port_exists(struct nsim_dev *nsim_dev, const struct devlink_port_n > if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF && > tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF && tmp->pfnum == attrs->pfnum) > return true; > + > + if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF && > + tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF && > + tmp->sfnum == attrs->sfnum && tmp->pfnum == attrs->pfnum) > + return true; > } > return false; > } > @@ -183,21 +211,71 @@ nsim_dev_devlink_port_index_lookup(const struct nsim_dev *nsim_dev, unsigned int > list_for_each_entry(port, &nsim_dev->port_functions.head, list) { > if (port->port_index != port_index) > continue; > + if (port->refcount > 1) { > + NL_SET_ERR_MSG_MOD(extack, "Port is in use"); > + return ERR_PTR(-EBUSY); > + } > return port; > } > NL_SET_ERR_MSG_MOD(extack, "User created port not found"); > return ERR_PTR(-ENOENT); > } > > +static struct nsim_port_function * > +pf_port_get(struct nsim_dev *nsim_dev, struct nsim_port_function *port) > +{ > + struct nsim_port_function *tmp; > + > + /* PF port addition doesn't need a parent. */ > + if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) > + return NULL; > + > + list_for_each_entry(tmp, &nsim_dev->port_functions.head, list) { > + if (tmp->flavour != DEVLINK_PORT_FLAVOUR_PCI_PF || tmp->pfnum != port->pfnum) > + continue; > + > + if (tmp->refcount + 1 == INT_MAX) > + return ERR_PTR(-ENOSPC); > + > + port->pf_port = tmp; > + tmp->refcount++; > + return tmp; > + } > + return ERR_PTR(-ENOENT); > +} > + > +static void pf_port_put(struct nsim_port_function *port) > +{ > + if (port->pf_port) { > + port->pf_port->refcount--; > + WARN_ON(port->pf_port->refcount < 0); > + } > + port->refcount--; > + WARN_ON(port->refcount != 0); > +} > + > static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_dev *nsim_dev, > struct nsim_port_function *port, > struct netlink_ext_ack *extack) > { > + struct nsim_port_function *pf_port; > int err; > > - list_add(&port->list, &nsim_dev->port_functions.head); > + /* Keep all PF ports at the start, so that when driver is unloaded > + * All SF ports from the end of the list can be removed first. > + */ > + if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) > + list_add(&port->list, &nsim_dev->port_functions.head); > + else > + list_add_tail(&port->list, &nsim_dev->port_functions.head); > + > + pf_port = pf_port_get(nsim_dev, port); > + if (IS_ERR(pf_port)) { > + NL_SET_ERR_MSG_MOD(extack, "Fail to get pf port"); > + err = PTR_ERR(pf_port); > + goto pf_err; > + } > > - port->state = DEVLINK_PORT_FUNCTION_STATE_INACTIVE; > err = devlink_port_register(devlink, &port->dl_port, port->port_index); > if (err) > goto reg_err; > @@ -213,6 +291,8 @@ static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_d > devlink_port_type_clear(&port->dl_port); > devlink_port_unregister(&port->dl_port); > reg_err: > + pf_port_put(port); > +pf_err: > list_del(&port->list); > return err; > } > @@ -224,12 +304,14 @@ static void nsim_devlink_port_function_del(struct nsim_dev *nsim_dev, > unregister_netdev(port->netdev); > devlink_port_unregister(&port->dl_port); > list_del(&port->list); > + pf_port_put(port); > } > > static bool nsim_dev_port_flavour_supported(const struct nsim_dev *nsim_dev, > const struct devlink_port_new_attrs *attrs) > { > - return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF; > + return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF || > + attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF; > } > > int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *attrs, > @@ -266,7 +348,11 @@ int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port > nsim_dev->switch_id.id_len); > port->dl_port.attrs.switch_id.id_len = nsim_dev->switch_id.id_len; > > - devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false); > + if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) > + devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false); > + else > + devlink_port_attrs_pci_sf_set(&port->dl_port, port->controller, port->pfnum, > + port->sfnum, false); > > err = nsim_devlink_port_function_add(devlink, nsim_dev, port, extack); > if (err) > @@ -333,6 +419,7 @@ void nsim_dev_port_function_disable(struct nsim_dev *nsim_dev) > * ports. > */ > > + /* Remove SF ports first, followed by PF ports. */ > list_for_each_entry_safe_reverse(port, tmp, &nsim_dev->port_functions.head, list) { > nsim_devlink_port_function_del(nsim_dev, port); > nsim_devlink_port_function_free(nsim_dev, port); >
> From: David Ahern <dsahern@gmail.com> > Sent: Friday, September 18, 2020 2:01 AM > > On 9/17/20 11:20 AM, Parav Pandit wrote: > > Simulate PCI SF ports. Allow user to create one or more PCI SF ports. > > > > Examples: > > > > Create a PCI PF and PCI SF port. > > $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $ > > devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 sfnum > > 44 $ devlink port show netdevsim/netdevsim10/11 > > netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour pcisf > controller 0 pfnum 0 sfnum 44 external true splittable false > > function: > > hw_addr 00:00:00:00:00:00 state inactive > > > > $ devlink port function set netdevsim/netdevsim10/11 hw_addr > > 00:11:22:33:44:55 state active > > > > $ devlink port show netdevsim/netdevsim10/11 -jp { > > "port": { > > "netdevsim/netdevsim10/11": { > > "type": "eth", > > "netdev": "eni10npf0sf44", > > I could be missing something, but it does not seem like this patch creates the > netdevice for the subfunction. > The sf port created here is the eswitch port with a valid switch id similar to PF and physical port. So the netdev created is the representor netdevice. It is created uniformly for subfunction and pf port flavours. This series enables user to add PCI PF and subfunction ports. PF port addition (and its representor netdev) is done in patch 5 [1]. This patch for SF utilizes the same ' struct nsim_port_function' for PF and SF. Only difference between them is flavour. [1] https://lore.kernel.org/netdev/20200917172020.26484-6-parav@nvidia.com/ > > > "flavour": "pcisf", > > "controller": 0, > > "pfnum": 0, > > "sfnum": 44, > > "external": true, > > "splittable": false, > > "function": { > > "hw_addr": "00:11:22:33:44:55", > > "state": "active" > > } > > } > > } > > } > > > > Delete newly added devlink port > > $ devlink port add netdevsim/netdevsim10/11 > > > > Add devlink port of flavour 'pcisf' where port index and sfnum are > > auto assigned by driver. > > $ devlink port add netdevsim/netdevsim10 flavour pcisf controller 0 > > pfnum 0
On 9/17/20 9:29 PM, Parav Pandit wrote: >>> Examples: >>> >>> Create a PCI PF and PCI SF port. >>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $ >>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 sfnum >>> 44 $ devlink port show netdevsim/netdevsim10/11 >>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour pcisf >> controller 0 pfnum 0 sfnum 44 external true splittable false >>> function: >>> hw_addr 00:00:00:00:00:00 state inactive >>> >>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr >>> 00:11:22:33:44:55 state active >>> >>> $ devlink port show netdevsim/netdevsim10/11 -jp { >>> "port": { >>> "netdevsim/netdevsim10/11": { >>> "type": "eth", >>> "netdev": "eni10npf0sf44", >> >> I could be missing something, but it does not seem like this patch creates the >> netdevice for the subfunction. >> > The sf port created here is the eswitch port with a valid switch id similar to PF and physical port. > So the netdev created is the representor netdevice. > It is created uniformly for subfunction and pf port flavours. To be clear: If I run the devlink commands to create a sub-function, `ip link show` should list a net_device that corresponds to the sub-function?
Hi David, > From: David Ahern <dsahern@gmail.com> > Sent: Friday, September 18, 2020 9:08 AM > > On 9/17/20 9:29 PM, Parav Pandit wrote: > >>> Examples: > >>> > >>> Create a PCI PF and PCI SF port. > >>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $ > >>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 > >>> sfnum > >>> 44 $ devlink port show netdevsim/netdevsim10/11 > >>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour > >>> pcisf > >> controller 0 pfnum 0 sfnum 44 external true splittable false > >>> function: > >>> hw_addr 00:00:00:00:00:00 state inactive > >>> > >>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr > >>> 00:11:22:33:44:55 state active > >>> > >>> $ devlink port show netdevsim/netdevsim10/11 -jp { > >>> "port": { > >>> "netdevsim/netdevsim10/11": { > >>> "type": "eth", > >>> "netdev": "eni10npf0sf44", > >> > >> I could be missing something, but it does not seem like this patch > >> creates the netdevice for the subfunction. > >> > > The sf port created here is the eswitch port with a valid switch id similar to PF > and physical port. > > So the netdev created is the representor netdevice. > > It is created uniformly for subfunction and pf port flavours. > > To be clear: If I run the devlink commands to create a sub-function, `ip link > show` should list a net_device that corresponds to the sub-function? In this series only representor netdevice corresponds to sub-function will be visible in ip link show, i.e. eni10npf0sf44. Netdevsim is only simulating the eswitch side or control path at present for pf/vf/sf ports. So other end of this port (netdevice/rdma device/vdpa device) are not yet created. Subfunction will be anchored on virtbus described in RFC [1], which is not yet in-kernel yet. Grep for "every SF a device is created on virtbus" to jump to this part of the long RFC. [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/
On 9/17/2020 9:41 PM, Parav Pandit wrote: > Hi David, > >> From: David Ahern <dsahern@gmail.com> >> Sent: Friday, September 18, 2020 9:08 AM >> >> On 9/17/20 9:29 PM, Parav Pandit wrote: >>>>> Examples: >>>>> >>>>> Create a PCI PF and PCI SF port. >>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $ >>>>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 >>>>> sfnum >>>>> 44 $ devlink port show netdevsim/netdevsim10/11 >>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour >>>>> pcisf >>>> controller 0 pfnum 0 sfnum 44 external true splittable false >>>>> function: >>>>> hw_addr 00:00:00:00:00:00 state inactive >>>>> >>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr >>>>> 00:11:22:33:44:55 state active >>>>> >>>>> $ devlink port show netdevsim/netdevsim10/11 -jp { >>>>> "port": { >>>>> "netdevsim/netdevsim10/11": { >>>>> "type": "eth", >>>>> "netdev": "eni10npf0sf44", >>>> >>>> I could be missing something, but it does not seem like this patch >>>> creates the netdevice for the subfunction. >>>> >>> The sf port created here is the eswitch port with a valid switch id similar to PF >> and physical port. >>> So the netdev created is the representor netdevice. >>> It is created uniformly for subfunction and pf port flavours. >> >> To be clear: If I run the devlink commands to create a sub-function, `ip link >> show` should list a net_device that corresponds to the sub-function? > > In this series only representor netdevice corresponds to sub-function will be visible in ip link show, i.e. eni10npf0sf44. This should be OK if the eSwitch mode is changed to switchdev. But i think it should be possible to create a subfunction even in legacy mode where representor netdev is not created. > > Netdevsim is only simulating the eswitch side or control path at present for pf/vf/sf ports. > So other end of this port (netdevice/rdma device/vdpa device) are not yet created. > > Subfunction will be anchored on virtbus described in RFC [1], which is not yet in-kernel yet. > Grep for "every SF a device is created on virtbus" to jump to this part of the long RFC. > > [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/ >
> From: Samudrala, Sridhar <sridhar.samudrala@intel.com> > Sent: Friday, September 18, 2020 10:23 AM > > > On 9/17/2020 9:41 PM, Parav Pandit wrote: > > Hi David, > > > >> From: David Ahern <dsahern@gmail.com> > >> Sent: Friday, September 18, 2020 9:08 AM > >> > >> On 9/17/20 9:29 PM, Parav Pandit wrote: > >>>>> Examples: > >>>>> > >>>>> Create a PCI PF and PCI SF port. > >>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 > >>>>> $ devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 > >>>>> sfnum > >>>>> 44 $ devlink port show netdevsim/netdevsim10/11 > >>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour > >>>>> pcisf > >>>> controller 0 pfnum 0 sfnum 44 external true splittable false > >>>>> function: > >>>>> hw_addr 00:00:00:00:00:00 state inactive > >>>>> > >>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr > >>>>> 00:11:22:33:44:55 state active > >>>>> > >>>>> $ devlink port show netdevsim/netdevsim10/11 -jp { > >>>>> "port": { > >>>>> "netdevsim/netdevsim10/11": { > >>>>> "type": "eth", > >>>>> "netdev": "eni10npf0sf44", > >>>> > >>>> I could be missing something, but it does not seem like this patch > >>>> creates the netdevice for the subfunction. > >>>> > >>> The sf port created here is the eswitch port with a valid switch id > >>> similar to PF > >> and physical port. > >>> So the netdev created is the representor netdevice. > >>> It is created uniformly for subfunction and pf port flavours. > >> > >> To be clear: If I run the devlink commands to create a sub-function, > >> `ip link show` should list a net_device that corresponds to the sub-function? > > > > In this series only representor netdevice corresponds to sub-function will be > visible in ip link show, i.e. eni10npf0sf44. > > This should be OK if the eSwitch mode is changed to switchdev. > But i think it should be possible to create a subfunction even in legacy mode > where representor netdev is not created. switch_id is optional attribute of the devlink port. It is applicable only when it is eswitch port in switchdev mode. > > > > > Netdevsim is only simulating the eswitch side or control path at present for > pf/vf/sf ports. > > So other end of this port (netdevice/rdma device/vdpa device) are not yet > created. > > > > Subfunction will be anchored on virtbus described in RFC [1], which is not yet > in-kernel yet. > > Grep for "every SF a device is created on virtbus" to jump to this part of the > long RFC. > > > > [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/ > >
On 9/17/20 10:41 PM, Parav Pandit wrote: > Hi David, > >> From: David Ahern <dsahern@gmail.com> >> Sent: Friday, September 18, 2020 9:08 AM >> >> On 9/17/20 9:29 PM, Parav Pandit wrote: >>>>> Examples: >>>>> >>>>> Create a PCI PF and PCI SF port. >>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 $ >>>>> devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 >>>>> sfnum >>>>> 44 $ devlink port show netdevsim/netdevsim10/11 >>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour >>>>> pcisf >>>> controller 0 pfnum 0 sfnum 44 external true splittable false >>>>> function: >>>>> hw_addr 00:00:00:00:00:00 state inactive >>>>> >>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr >>>>> 00:11:22:33:44:55 state active >>>>> >>>>> $ devlink port show netdevsim/netdevsim10/11 -jp { >>>>> "port": { >>>>> "netdevsim/netdevsim10/11": { >>>>> "type": "eth", >>>>> "netdev": "eni10npf0sf44", >>>> >>>> I could be missing something, but it does not seem like this patch >>>> creates the netdevice for the subfunction. >>>> >>> The sf port created here is the eswitch port with a valid switch id similar to PF >> and physical port. >>> So the netdev created is the representor netdevice. >>> It is created uniformly for subfunction and pf port flavours. >> >> To be clear: If I run the devlink commands to create a sub-function, `ip link >> show` should list a net_device that corresponds to the sub-function? > > In this series only representor netdevice corresponds to sub-function will be visible in ip link show, i.e. eni10npf0sf44. > > Netdevsim is only simulating the eswitch side or control path at present for pf/vf/sf ports. > So other end of this port (netdevice/rdma device/vdpa device) are not yet created. > > Subfunction will be anchored on virtbus described in RFC [1], which is not yet in-kernel yet. > Grep for "every SF a device is created on virtbus" to jump to this part of the long RFC. > > [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/ > Thanks for the reference. I have seen that. I am interested in this idea of creating netdevs for 'slices' of an asic or nic, but it is not clear to me how it connects end to end. Will you be able to create a sub-function based netdevice, assign it limited resources from the nic and then assign that netdevice to a container for example?
> From: David Ahern <dsahern@gmail.com> > Sent: Friday, September 18, 2020 8:54 PM > > On 9/17/20 10:41 PM, Parav Pandit wrote: > > Hi David, > > > >> From: David Ahern <dsahern@gmail.com> > >> Sent: Friday, September 18, 2020 9:08 AM > >> > >> On 9/17/20 9:29 PM, Parav Pandit wrote: > >>>>> Examples: > >>>>> > >>>>> Create a PCI PF and PCI SF port. > >>>>> $ devlink port add netdevsim/netdevsim10/10 flavour pcipf pfnum 0 > >>>>> $ devlink port add netdevsim/netdevsim10/11 flavour pcisf pfnum 0 > >>>>> sfnum > >>>>> 44 $ devlink port show netdevsim/netdevsim10/11 > >>>>> netdevsim/netdevsim10/11: type eth netdev eni10npf0sf44 flavour > >>>>> pcisf > >>>> controller 0 pfnum 0 sfnum 44 external true splittable false > >>>>> function: > >>>>> hw_addr 00:00:00:00:00:00 state inactive > >>>>> > >>>>> $ devlink port function set netdevsim/netdevsim10/11 hw_addr > >>>>> 00:11:22:33:44:55 state active > >>>>> > >>>>> $ devlink port show netdevsim/netdevsim10/11 -jp { > >>>>> "port": { > >>>>> "netdevsim/netdevsim10/11": { > >>>>> "type": "eth", > >>>>> "netdev": "eni10npf0sf44", > >>>> > >>>> I could be missing something, but it does not seem like this patch > >>>> creates the netdevice for the subfunction. > >>>> > >>> The sf port created here is the eswitch port with a valid switch id > >>> similar to PF > >> and physical port. > >>> So the netdev created is the representor netdevice. > >>> It is created uniformly for subfunction and pf port flavours. > >> > >> To be clear: If I run the devlink commands to create a sub-function, > >> `ip link show` should list a net_device that corresponds to the sub- > function? > > > > In this series only representor netdevice corresponds to sub-function will > be visible in ip link show, i.e. eni10npf0sf44. > > > > Netdevsim is only simulating the eswitch side or control path at present for > pf/vf/sf ports. > > So other end of this port (netdevice/rdma device/vdpa device) are not yet > created. > > > > Subfunction will be anchored on virtbus described in RFC [1], which is not > yet in-kernel yet. > > Grep for "every SF a device is created on virtbus" to jump to this part of the > long RFC. > > > > [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/ > > > > Thanks for the reference. I have seen that. I am interested in this idea of > creating netdevs for 'slices' of an asic or nic, but it is not clear to me how it > connects end to end. Will you be able to create a sub-function based > netdevice, assign it limited resources from the nic and then assign that > netdevice to a container for example? Yep. You described is precisely well. This short series creates eswitch representor for the moment. Once virtbus is in_kernel, will extend to create actual netdevice of the subfunction too.
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 0ea9705eda38..c70782e444d5 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -222,6 +222,7 @@ struct nsim_dev { struct list_head head; struct ida ida; struct ida pfnum_ida; + struct ida sfnum_ida; } port_functions; }; diff --git a/drivers/net/netdevsim/port_function.c b/drivers/net/netdevsim/port_function.c index 99581d3d15fe..e1812acd55b4 100644 --- a/drivers/net/netdevsim/port_function.c +++ b/drivers/net/netdevsim/port_function.c @@ -13,10 +13,12 @@ struct nsim_port_function { unsigned int port_index; enum devlink_port_flavour flavour; u32 controller; + u32 sfnum; u16 pfnum; struct nsim_port_function *pf_port; /* Valid only for SF port */ u8 hw_addr[ETH_ALEN]; u8 state; /* enum devlink_port_function_state */ + int refcount; /* Counts how many sf ports are bound attached to this pf port. */ }; void nsim_dev_port_function_init(struct nsim_dev *nsim_dev) @@ -25,10 +27,13 @@ void nsim_dev_port_function_init(struct nsim_dev *nsim_dev) INIT_LIST_HEAD(&nsim_dev->port_functions.head); ida_init(&nsim_dev->port_functions.ida); ida_init(&nsim_dev->port_functions.pfnum_ida); + ida_init(&nsim_dev->port_functions.sfnum_ida); } void nsim_dev_port_function_exit(struct nsim_dev *nsim_dev) { + WARN_ON(!ida_is_empty(&nsim_dev->port_functions.sfnum_ida)); + ida_destroy(&nsim_dev->port_functions.sfnum_ida); WARN_ON(!ida_is_empty(&nsim_dev->port_functions.pfnum_ida)); ida_destroy(&nsim_dev->port_functions.pfnum_ida); WARN_ON(!ida_is_empty(&nsim_dev->port_functions.ida)); @@ -119,9 +124,24 @@ nsim_devlink_port_function_alloc(struct nsim_dev *dev, const struct devlink_port goto fn_ida_err; port->pfnum = ret; break; + case DEVLINK_PORT_FLAVOUR_PCI_SF: + if (attrs->sfnum_valid) + ret = ida_alloc_range(&dev->port_functions.sfnum_ida, attrs->sfnum, + attrs->sfnum, GFP_KERNEL); + else + ret = ida_alloc(&dev->port_functions.sfnum_ida, GFP_KERNEL); + if (ret < 0) + goto fn_ida_err; + port->sfnum = ret; + port->pfnum = attrs->pfnum; + break; default: break; } + /* refcount_t is not needed as port is protected by port_functions.mutex. + * This count is to keep track of how many SF ports are attached a PF port. + */ + port->refcount = 1; return port; fn_ida_err: @@ -137,6 +157,9 @@ static void nsim_devlink_port_function_free(struct nsim_dev *dev, struct nsim_po case DEVLINK_PORT_FLAVOUR_PCI_PF: ida_simple_remove(&dev->port_functions.pfnum_ida, port->pfnum); break; + case DEVLINK_PORT_FLAVOUR_PCI_SF: + ida_simple_remove(&dev->port_functions.sfnum_ida, port->sfnum); + break; default: break; } @@ -170,6 +193,11 @@ nsim_dev_port_port_exists(struct nsim_dev *nsim_dev, const struct devlink_port_n if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF && tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF && tmp->pfnum == attrs->pfnum) return true; + + if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF && + tmp->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF && + tmp->sfnum == attrs->sfnum && tmp->pfnum == attrs->pfnum) + return true; } return false; } @@ -183,21 +211,71 @@ nsim_dev_devlink_port_index_lookup(const struct nsim_dev *nsim_dev, unsigned int list_for_each_entry(port, &nsim_dev->port_functions.head, list) { if (port->port_index != port_index) continue; + if (port->refcount > 1) { + NL_SET_ERR_MSG_MOD(extack, "Port is in use"); + return ERR_PTR(-EBUSY); + } return port; } NL_SET_ERR_MSG_MOD(extack, "User created port not found"); return ERR_PTR(-ENOENT); } +static struct nsim_port_function * +pf_port_get(struct nsim_dev *nsim_dev, struct nsim_port_function *port) +{ + struct nsim_port_function *tmp; + + /* PF port addition doesn't need a parent. */ + if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) + return NULL; + + list_for_each_entry(tmp, &nsim_dev->port_functions.head, list) { + if (tmp->flavour != DEVLINK_PORT_FLAVOUR_PCI_PF || tmp->pfnum != port->pfnum) + continue; + + if (tmp->refcount + 1 == INT_MAX) + return ERR_PTR(-ENOSPC); + + port->pf_port = tmp; + tmp->refcount++; + return tmp; + } + return ERR_PTR(-ENOENT); +} + +static void pf_port_put(struct nsim_port_function *port) +{ + if (port->pf_port) { + port->pf_port->refcount--; + WARN_ON(port->pf_port->refcount < 0); + } + port->refcount--; + WARN_ON(port->refcount != 0); +} + static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_dev *nsim_dev, struct nsim_port_function *port, struct netlink_ext_ack *extack) { + struct nsim_port_function *pf_port; int err; - list_add(&port->list, &nsim_dev->port_functions.head); + /* Keep all PF ports at the start, so that when driver is unloaded + * All SF ports from the end of the list can be removed first. + */ + if (port->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) + list_add(&port->list, &nsim_dev->port_functions.head); + else + list_add_tail(&port->list, &nsim_dev->port_functions.head); + + pf_port = pf_port_get(nsim_dev, port); + if (IS_ERR(pf_port)) { + NL_SET_ERR_MSG_MOD(extack, "Fail to get pf port"); + err = PTR_ERR(pf_port); + goto pf_err; + } - port->state = DEVLINK_PORT_FUNCTION_STATE_INACTIVE; err = devlink_port_register(devlink, &port->dl_port, port->port_index); if (err) goto reg_err; @@ -213,6 +291,8 @@ static int nsim_devlink_port_function_add(struct devlink *devlink, struct nsim_d devlink_port_type_clear(&port->dl_port); devlink_port_unregister(&port->dl_port); reg_err: + pf_port_put(port); +pf_err: list_del(&port->list); return err; } @@ -224,12 +304,14 @@ static void nsim_devlink_port_function_del(struct nsim_dev *nsim_dev, unregister_netdev(port->netdev); devlink_port_unregister(&port->dl_port); list_del(&port->list); + pf_port_put(port); } static bool nsim_dev_port_flavour_supported(const struct nsim_dev *nsim_dev, const struct devlink_port_new_attrs *attrs) { - return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF; + return attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF || + attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_SF; } int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *attrs, @@ -266,7 +348,11 @@ int nsim_dev_devlink_port_new(struct devlink *devlink, const struct devlink_port nsim_dev->switch_id.id_len); port->dl_port.attrs.switch_id.id_len = nsim_dev->switch_id.id_len; - devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false); + if (attrs->flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) + devlink_port_attrs_pci_pf_set(&port->dl_port, port->controller, port->pfnum, false); + else + devlink_port_attrs_pci_sf_set(&port->dl_port, port->controller, port->pfnum, + port->sfnum, false); err = nsim_devlink_port_function_add(devlink, nsim_dev, port, extack); if (err) @@ -333,6 +419,7 @@ void nsim_dev_port_function_disable(struct nsim_dev *nsim_dev) * ports. */ + /* Remove SF ports first, followed by PF ports. */ list_for_each_entry_safe_reverse(port, tmp, &nsim_dev->port_functions.head, list) { nsim_devlink_port_function_del(nsim_dev, port); nsim_devlink_port_function_free(nsim_dev, port);