Message ID | 77EF4405DD4BB54AACCE7DB593DF6A9A9FD653@SJEXCHMB14.corp.ad.broadcom.com |
---|---|
State | Rejected, archived |
Delegated to: | David Miller |
Headers | show |
On Thu, Nov 12, 2015 at 04:02:18PM +0000, Premkumar Jonnala wrote: > Packet forwarding to/from bond interfaces is done in software. > > This patch enables certain platforms to bridge traffic to/from > bond interfaces in hardware. Notifications are sent out when > the "active" slave set for a bond interface is updated in > software. Platforms use the notifications to program the > hardware accordingly. The changes have been verified to work > with configured and 802.3ad bond interfaces. Hi Premkumar Nice to see this. Do you also have patches for a switch using these notification? Are you targeting Starfighter 2? Thanks Andrew -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/11/15 08:02, Premkumar Jonnala wrote: > Packet forwarding to/from bond interfaces is done in software. > > This patch enables certain platforms to bridge traffic to/from > bond interfaces in hardware. Notifications are sent out when > the "active" slave set for a bond interface is updated in > software. Platforms use the notifications to program the > hardware accordingly. The changes have been verified to work > with configured and 802.3ad bond interfaces. This is a good explanation of why you want the changes, and how this is implemented in a system utilizing that, but this is not documenting why you are making these changes to the bonding code, nor how they are supposed to be used by an implementor driver, since there is no such user posted (yet?). You introduce two new NDOs which are not documented in the commit message which would be nice to explain, in particular, why adding new NDOs and not switchdev attributes and methods for instance? Also, is it possible to move some of the logic into a notifier instead of having to maintain an array of slaves and an array of slaves to discard? > > Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> > > --- > > diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c > index b4351ca..4b53733 100644 > --- a/drivers/net/bonding/bond_main.c > +++ b/drivers/net/bonding/bond_main.c > @@ -3759,6 +3759,101 @@ err: > bond_slave_arr_work_rearm(bond, 1); > } > > +static int slave_present(struct slave *slave, struct bond_up_slave *arr) > +{ > + int i; > + > + if (!arr) > + return 0; > + > + for (i = 0; i < arr->count; i++) { > + if (arr->arr[i] == slave) > + return 1; > + } > + return 0; > +} > + > +/* Send notification to clear/remove slaves for 'bond' in 'arr' except for > + * slaves in 'ignore_arr'. > + */ > +static int bond_slave_arr_clear_notify(struct bonding *bond, > + struct bond_up_slave *arr, > + struct bond_up_slave *ignore_arr) > +{ > + struct slave *slave; > + struct net_device *slave_dev; > + int i, rv; > + const struct net_device_ops *ops; > + > + if (!bond->dev || !arr) > + return -EINVAL; > + > + rv = 0; > + for (i = 0; i < arr->count; i++) { > + slave = arr->arr[i]; > + if (!slave || !slave->dev) > + continue; > + > + slave_dev = slave->dev; > + if (slave_present(slave, ignore_arr)) { > + netdev_dbg(bond->dev, "ignoring clear of slave %s\n", > + slave_dev->name); > + continue; > + } > + ops = slave_dev->netdev_ops; > + if (!ops || !ops->ndo_bond_slave_discard) { > + netdev_dbg(bond->dev, "No slave discard ops for %s\n", > + slave_dev->name); > + continue; > + } > + rv = ops->ndo_bond_slave_discard(slave_dev, bond->dev); > + if (rv < 0) > + return rv; > + } > + return rv; > +} > + > +/* Send notification about updated slaves for 'bond' except for slaves in > + * 'ignore_arr'. > + */ > +static int bond_slave_arr_set_notify(struct bonding *bond, > + struct bond_up_slave *ignore_arr) > +{ > + struct slave *slave; > + struct net_device *slave_dev; > + struct bond_up_slave *arr; > + int i, rv; > + const struct net_device_ops *ops; > + > + if (!bond || !bond->dev) > + return -EINVAL; > + rv = 0; > + > + arr = rtnl_dereference(bond->slave_arr); > + if (!arr) > + return -EINVAL; > + > + for (i = 0; i < arr->count; i++) { > + slave = arr->arr[i]; > + slave_dev = slave->dev; > + if (slave_present(slave, ignore_arr)) { > + netdev_dbg(bond->dev, "ignoring add of slave %s\n", > + slave->dev->name); > + continue; > + } > + ops = slave_dev->netdev_ops; > + if (!ops || !ops->ndo_bond_slave_add) { > + netdev_dbg(bond->dev, "No slave add ops for %s\n", > + slave_dev->name); > + continue; > + } > + rv = ops->ndo_bond_slave_add(slave_dev, bond->dev); > + if (rv < 0) > + return rv; > + } > + return rv; > +} > + > /* Build the usable slaves array in control path for modes that use xmit-hash > * to determine the slave interface - > * (a) BOND_MODE_8023AD > @@ -3771,7 +3866,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) > { > struct slave *slave; > struct list_head *iter; > - struct bond_up_slave *new_arr, *old_arr; > + struct bond_up_slave *new_arr, *old_arr, *discard_arr = 0; > int agg_id = 0; > int ret = 0; > > @@ -3786,6 +3881,12 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) > pr_err("Failed to build slave-array.\n"); > goto out; > } > + discard_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]), > + GFP_KERNEL); > + if (!discard_arr) { > + ret = -ENOMEM; > + goto out; > + } > if (BOND_MODE(bond) == BOND_MODE_8023AD) { > struct ad_info ad_info; > > @@ -3797,6 +3898,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) > */ > old_arr = rtnl_dereference(bond->slave_arr); > if (old_arr) { > + bond_slave_arr_clear_notify(bond, old_arr, 0); > RCU_INIT_POINTER(bond->slave_arr, NULL); > kfree_rcu(old_arr, rcu); > } > @@ -3809,8 +3911,10 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) > struct aggregator *agg; > > agg = SLAVE_AD_INFO(slave)->port.aggregator; > - if (!agg || agg->aggregator_identifier != agg_id) > + if (!agg || agg->aggregator_identifier != agg_id) { > + discard_arr->arr[discard_arr->count++] = slave; > continue; > + } > } > if (!bond_slave_can_tx(slave)) > continue; > @@ -3820,10 +3924,15 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) > } > > old_arr = rtnl_dereference(bond->slave_arr); > + bond_slave_arr_clear_notify(bond, old_arr, new_arr); > + bond_slave_arr_clear_notify(bond, discard_arr, 0); > rcu_assign_pointer(bond->slave_arr, new_arr); > + bond_slave_arr_set_notify(bond, old_arr); > if (old_arr) > kfree_rcu(old_arr, rcu); > out: > + if (discard_arr) > + kfree(discard_arr); > if (ret != 0 && skipslave) { > int idx; > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index 4ac653b..facc35f 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -1236,6 +1236,10 @@ struct net_device_ops { > bool proto_down); > int (*ndo_fill_metadata_dst)(struct net_device *dev, > struct sk_buff *skb); > + int (*ndo_bond_slave_add)(struct net_device *slave_dev, > + struct net_device *bond); > + int (*ndo_bond_slave_discard)(struct net_device *slave_dev, > + struct net_device *bond); > }; > > /** > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
From: Florian Fainelli <f.fainelli@gmail.com> Date: Fri, 13 Nov 2015 10:38:48 -0800 > This is a good explanation of why you want the changes, and how this is > implemented in a system utilizing that, but this is not documenting why > you are making these changes to the bonding code, nor how they are > supposed to be used by an implementor driver, since there is no such > user posted (yet?). I am basically not even going to look at proposals for new device ops that don't also show an actual user of the new interfaces. That's not how we do development, and not providing a real example user of a new set of interfaces makes it impossible to properly review such changes. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Nov 12, 2015 at 04:02:18PM +0000, Premkumar Jonnala wrote: > Packet forwarding to/from bond interfaces is done in software. > > This patch enables certain platforms to bridge traffic to/from > bond interfaces in hardware. Notifications are sent out when > the "active" slave set for a bond interface is updated in > software. Platforms use the notifications to program the > hardware accordingly. Hi Premkumar I can think of three use cases of binding with hardware offload engines: 1) External user ports of the switch are bonded together into a trunk. 2) Host Ethernet ports connected to the switch are bonded together so you get double the bandwidth between the host and the switch. 3) In DSA setups where you have a cluster of switches, some switch ports connect to other switch ports, so forming the cluster. You can bond ports together to double the bandwidth between switches in the cluster. The requirements here are quite different in each case. Which of these uses cases are you trying to address? Thanks Andrew -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Thu, Nov 12, 2015 at 05:02:18PM CET, pjonnala@broadcom.com wrote: >Packet forwarding to/from bond interfaces is done in software. > >This patch enables certain platforms to bridge traffic to/from >bond interfaces in hardware. Notifications are sent out when >the "active" slave set for a bond interface is updated in >software. Platforms use the notifications to program the >hardware accordingly. The changes have been verified to work >with configured and 802.3ad bond interfaces. > >Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> This patch is wrong, in many different acpects. Leaving the submission style, and no in-tree consumer aside, adding ndos for this thing is unacceptable. It should be handled as a part of switchdev attrs. Also, the solution should not be bonding-centric. I have a patchset in my queue which does this correctly, for bond and team using switchdev attr and with actual in-tree consumer, mlxsw driver. I plan to send that soon after net-next opens. Jiri -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Thu, Nov 12, 2015 at 06:08:01PM CET, andrew@lunn.ch wrote: >On Thu, Nov 12, 2015 at 04:02:18PM +0000, Premkumar Jonnala wrote: >> Packet forwarding to/from bond interfaces is done in software. >> >> This patch enables certain platforms to bridge traffic to/from >> bond interfaces in hardware. Notifications are sent out when >> the "active" slave set for a bond interface is updated in >> software. Platforms use the notifications to program the >> hardware accordingly. The changes have been verified to work >> with configured and 802.3ad bond interfaces. > >Hi Premkumar > >Nice to see this. Do you also have patches for a switch using these >notification? Are you targeting Starfighter 2? I fear they are targeting some closed-source crap :/ -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 15-11-14 01:39 AM, Jiri Pirko wrote: > Thu, Nov 12, 2015 at 05:02:18PM CET, pjonnala@broadcom.com wrote: >> Packet forwarding to/from bond interfaces is done in software. >> >> This patch enables certain platforms to bridge traffic to/from >> bond interfaces in hardware. Notifications are sent out when >> the "active" slave set for a bond interface is updated in >> software. Platforms use the notifications to program the >> hardware accordingly. The changes have been verified to work >> with configured and 802.3ad bond interfaces. >> >> Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> > > This patch is wrong, in many different acpects. Leaving the submission > style, and no in-tree consumer aside, adding ndos for this thing is > unacceptable. It should be handled as a part of switchdev attrs. Why is it unacceptable? I think its at least worth debating. If I have a nic that can do bonding but none of the other switchdev things then implementing another ndo is certainly more straight forward. As it is heading many of the 10+Gbps nics may need to implement just enough of the switchdev infrastructure to get things like bonding up and working. Not necessarily a bad thing if we make the switchdev infrastructure light but does sort of make the name confusing if my nic is not doing any switching ;) Thanks, John > Also, the solution should not be bonding-centric. > > I have a patchset in my queue which does this correctly, for bond and team > using switchdev attr and with actual in-tree consumer, mlxsw driver. > I plan to send that soon after net-next opens. > > Jiri > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sun, Nov 15, 2015 at 06:51:28AM CET, john.fastabend@gmail.com wrote: >On 15-11-14 01:39 AM, Jiri Pirko wrote: >> Thu, Nov 12, 2015 at 05:02:18PM CET, pjonnala@broadcom.com wrote: >>> Packet forwarding to/from bond interfaces is done in software. >>> >>> This patch enables certain platforms to bridge traffic to/from >>> bond interfaces in hardware. Notifications are sent out when >>> the "active" slave set for a bond interface is updated in >>> software. Platforms use the notifications to program the >>> hardware accordingly. The changes have been verified to work >>> with configured and 802.3ad bond interfaces. >>> >>> Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> >> >> This patch is wrong, in many different acpects. Leaving the submission >> style, and no in-tree consumer aside, adding ndos for this thing is >> unacceptable. It should be handled as a part of switchdev attrs. > >Why is it unacceptable? I think its at least worth debating. If I >have a nic that can do bonding but none of the other switchdev >things then implementing another ndo is certainly more straight >forward. As it is heading many of the 10+Gbps nics may need to >implement just enough of the switchdev infrastructure to get things >like bonding up and working. Not necessarily a bad thing if we make >the switchdev infrastructure light but does sort of make the name >confusing if my nic is not doing any switching ;) Can you please describe what exaclty such a NIC functionality would look like? If there's not switching/forwarding, then the packets would go trought slow-path (kernel bonding/team driver). So why would we need to tell anything to driver/hw? Thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Florian Fainelli [mailto:f.fainelli@gmail.com] > Sent: Saturday, November 14, 2015 12:09 AM > To: Premkumar Jonnala; netdev@vger.kernel.org; sfeldma@gmail.com; > jiri@resnulli.us; nikolay@cumulusnetworks.com; idosch@mellanox.com; > gospo@cumulusnetworks.com > Subject: Re: [PATCH] bonding: Offloading bonds to hardware > > On 12/11/15 08:02, Premkumar Jonnala wrote: > > Packet forwarding to/from bond interfaces is done in software. > > > > This patch enables certain platforms to bridge traffic to/from > > bond interfaces in hardware. Notifications are sent out when > > the "active" slave set for a bond interface is updated in > > software. Platforms use the notifications to program the > > hardware accordingly. The changes have been verified to work > > with configured and 802.3ad bond interfaces. > > This is a good explanation of why you want the changes, and how this is > implemented in a system utilizing that, but this is not documenting why > you are making these changes to the bonding code, nor how they are > supposed to be used by an implementor driver, since there is no such > user posted (yet?). Thank you for reading thru. In a system where forwarding happens in hardware, bonding interfaces need to be handled appropriately. Bonding interfaces should be treated as a single logical forwarding port, and traffic egressing bonding interface should be load balanced across the members. Packets ingress slave interface should be associated with appropriate bond interface for forwarding purposes. I will add more comments to the ndo/switchdev interfaces based on feedback. In short, the APIs associate/disassociate a slave with a bond interface. Typically, drivers program a "bonding table" in hardware that associates/disassociates a physical port with a bond. Learning, forwarding, etc. from then on consider the bond interface and not the physical interface. When a packet needs to egress a bond interface, a load balancing scheme in hardware figures out the slave the packet needs to be sent out on. Normally, a hash function that uses some fields from packet (MAC SA, MAC DA, ethertype, among others) are used to determine the slave out which the packet is sent. > > You introduce two new NDOs which are not documented in the commit > message which would be nice to explain, in particular, why adding new > NDOs and not switchdev attributes and methods for instance? I am open to changing the APIs to use the switchdev interface. I will send the diffs out shortly. As for commenting, I was following the coding/commenting style in the file. I am open to adding more comments. > > Also, is it possible to move some of the logic into a notifier instead > of having to maintain an array of slaves and an array of slaves to discard? Can you please elaborate? Bonding interfaces maintain an array of active slaves already. I've created another array, just to manage cleanup/updates to the slave set. For situations where the slave set does not change, or where some slaves stay across the slave-array update, I was trying to avoid a remove-slave-x followed by an immediate add-slave-x call. Avoiding unnecessary remove/add calls will help prevent traffic interruptions. Thanks Prem > > > > > Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> > > > > --- > > > > diff --git a/drivers/net/bonding/bond_main.c > b/drivers/net/bonding/bond_main.c > > index b4351ca..4b53733 100644 > > --- a/drivers/net/bonding/bond_main.c > > +++ b/drivers/net/bonding/bond_main.c > > @@ -3759,6 +3759,101 @@ err: > > bond_slave_arr_work_rearm(bond, 1); > > } > > > > +static int slave_present(struct slave *slave, struct bond_up_slave *arr) > > +{ > > + int i; > > + > > + if (!arr) > > + return 0; > > + > > + for (i = 0; i < arr->count; i++) { > > + if (arr->arr[i] == slave) > > + return 1; > > + } > > + return 0; > > +} > > + > > +/* Send notification to clear/remove slaves for 'bond' in 'arr' except for > > + * slaves in 'ignore_arr'. > > + */ > > +static int bond_slave_arr_clear_notify(struct bonding *bond, > > + struct bond_up_slave *arr, > > + struct bond_up_slave *ignore_arr) > > +{ > > + struct slave *slave; > > + struct net_device *slave_dev; > > + int i, rv; > > + const struct net_device_ops *ops; > > + > > + if (!bond->dev || !arr) > > + return -EINVAL; > > + > > + rv = 0; > > + for (i = 0; i < arr->count; i++) { > > + slave = arr->arr[i]; > > + if (!slave || !slave->dev) > > + continue; > > + > > + slave_dev = slave->dev; > > + if (slave_present(slave, ignore_arr)) { > > + netdev_dbg(bond->dev, "ignoring clear of slave %s\n", > > + slave_dev->name); > > + continue; > > + } > > + ops = slave_dev->netdev_ops; > > + if (!ops || !ops->ndo_bond_slave_discard) { > > + netdev_dbg(bond->dev, "No slave discard ops for > %s\n", > > + slave_dev->name); > > + continue; > > + } > > + rv = ops->ndo_bond_slave_discard(slave_dev, bond->dev); > > + if (rv < 0) > > + return rv; > > + } > > + return rv; > > +} > > + > > +/* Send notification about updated slaves for 'bond' except for slaves in > > + * 'ignore_arr'. > > + */ > > +static int bond_slave_arr_set_notify(struct bonding *bond, > > + struct bond_up_slave *ignore_arr) > > +{ > > + struct slave *slave; > > + struct net_device *slave_dev; > > + struct bond_up_slave *arr; > > + int i, rv; > > + const struct net_device_ops *ops; > > + > > + if (!bond || !bond->dev) > > + return -EINVAL; > > + rv = 0; > > + > > + arr = rtnl_dereference(bond->slave_arr); > > + if (!arr) > > + return -EINVAL; > > + > > + for (i = 0; i < arr->count; i++) { > > + slave = arr->arr[i]; > > + slave_dev = slave->dev; > > + if (slave_present(slave, ignore_arr)) { > > + netdev_dbg(bond->dev, "ignoring add of slave %s\n", > > + slave->dev->name); > > + continue; > > + } > > + ops = slave_dev->netdev_ops; > > + if (!ops || !ops->ndo_bond_slave_add) { > > + netdev_dbg(bond->dev, "No slave add ops for %s\n", > > + slave_dev->name); > > + continue; > > + } > > + rv = ops->ndo_bond_slave_add(slave_dev, bond->dev); > > + if (rv < 0) > > + return rv; > > + } > > + return rv; > > +} > > + > > /* Build the usable slaves array in control path for modes that use xmit-hash > > * to determine the slave interface - > > * (a) BOND_MODE_8023AD > > @@ -3771,7 +3866,7 @@ int bond_update_slave_arr(struct bonding *bond, > struct slave *skipslave) > > { > > struct slave *slave; > > struct list_head *iter; > > - struct bond_up_slave *new_arr, *old_arr; > > + struct bond_up_slave *new_arr, *old_arr, *discard_arr = 0; > > int agg_id = 0; > > int ret = 0; > > > > @@ -3786,6 +3881,12 @@ int bond_update_slave_arr(struct bonding *bond, > struct slave *skipslave) > > pr_err("Failed to build slave-array.\n"); > > goto out; > > } > > + discard_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond- > >slave_cnt]), > > + GFP_KERNEL); > > + if (!discard_arr) { > > + ret = -ENOMEM; > > + goto out; > > + } > > if (BOND_MODE(bond) == BOND_MODE_8023AD) { > > struct ad_info ad_info; > > > > @@ -3797,6 +3898,7 @@ int bond_update_slave_arr(struct bonding *bond, > struct slave *skipslave) > > */ > > old_arr = rtnl_dereference(bond->slave_arr); > > if (old_arr) { > > + bond_slave_arr_clear_notify(bond, old_arr, 0); > > RCU_INIT_POINTER(bond->slave_arr, NULL); > > kfree_rcu(old_arr, rcu); > > } > > @@ -3809,8 +3911,10 @@ int bond_update_slave_arr(struct bonding *bond, > struct slave *skipslave) > > struct aggregator *agg; > > > > agg = SLAVE_AD_INFO(slave)->port.aggregator; > > - if (!agg || agg->aggregator_identifier != agg_id) > > + if (!agg || agg->aggregator_identifier != agg_id) { > > + discard_arr->arr[discard_arr->count++] = slave; > > continue; > > + } > > } > > if (!bond_slave_can_tx(slave)) > > continue; > > @@ -3820,10 +3924,15 @@ int bond_update_slave_arr(struct bonding *bond, > struct slave *skipslave) > > } > > > > old_arr = rtnl_dereference(bond->slave_arr); > > + bond_slave_arr_clear_notify(bond, old_arr, new_arr); > > + bond_slave_arr_clear_notify(bond, discard_arr, 0); > > rcu_assign_pointer(bond->slave_arr, new_arr); > > + bond_slave_arr_set_notify(bond, old_arr); > > if (old_arr) > > kfree_rcu(old_arr, rcu); > > out: > > + if (discard_arr) > > + kfree(discard_arr); > > if (ret != 0 && skipslave) { > > int idx; > > > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > > index 4ac653b..facc35f 100644 > > --- a/include/linux/netdevice.h > > +++ b/include/linux/netdevice.h > > @@ -1236,6 +1236,10 @@ struct net_device_ops { > > bool proto_down); > > int (*ndo_fill_metadata_dst)(struct net_device > *dev, > > struct sk_buff *skb); > > + int (*ndo_bond_slave_add)(struct net_device *slave_dev, > > + struct net_device *bond); > > + int (*ndo_bond_slave_discard)(struct net_device > *slave_dev, > > + struct net_device *bond); > > }; > > > > /** > > -- > > To unsubscribe from this list: send the line "unsubscribe netdev" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > > -- > Florian
> -----Original Message----- > From: David Miller [mailto:davem@davemloft.net] > Sent: Saturday, November 14, 2015 12:40 AM > To: f.fainelli@gmail.com > Cc: Premkumar Jonnala; netdev@vger.kernel.org; sfeldma@gmail.com; > jiri@resnulli.us; nikolay@cumulusnetworks.com; idosch@mellanox.com; > gospo@cumulusnetworks.com > Subject: Re: [PATCH] bonding: Offloading bonds to hardware > > From: Florian Fainelli <f.fainelli@gmail.com> > Date: Fri, 13 Nov 2015 10:38:48 -0800 > > > This is a good explanation of why you want the changes, and how this is > > implemented in a system utilizing that, but this is not documenting why > > you are making these changes to the bonding code, nor how they are > > supposed to be used by an implementor driver, since there is no such > > user posted (yet?). > > I am basically not even going to look at proposals for new device ops > that don't also show an actual user of the new interfaces. > > That's not how we do development, and not providing a real example > user of a new set of interfaces makes it impossible to properly review > such changes. Thank you for the feedback David. I am currently handling/implementing these notifications in a custom (closed source) driver. I will update the diff with an actual demo of the handler, and resend the diffs. Thanks Prem -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Andrew Lunn [mailto:andrew@lunn.ch] > Sent: Saturday, November 14, 2015 2:42 AM > To: Premkumar Jonnala > Cc: netdev@vger.kernel.org > Subject: Re: [PATCH] bonding: Offloading bonds to hardware > > On Thu, Nov 12, 2015 at 04:02:18PM +0000, Premkumar Jonnala wrote: > > Packet forwarding to/from bond interfaces is done in software. > > > > This patch enables certain platforms to bridge traffic to/from > > bond interfaces in hardware. Notifications are sent out when > > the "active" slave set for a bond interface is updated in > > software. Platforms use the notifications to program the > > hardware accordingly. > > Hi Premkumar > > I can think of three use cases of binding with hardware offload > engines: Thank you for the comments Andrew. > > 1) External user ports of the switch are bonded together into a trunk. > > 2) Host Ethernet ports connected to the switch are bonded together so > you get double the bandwidth between the host and the switch. > > 3) In DSA setups where you have a cluster of switches, some switch > ports connect to other switch ports, so forming the cluster. You can > bond ports together to double the bandwidth between switches in the > cluster. > > The requirements here are quite different in each case. > Which of these uses cases are you trying to address? I am looking primarily at case (1) above, where traffic is forwarded in hardware. When bond interfaces are configured with some switch (hardware forwarded) ports as slaves, the switching hardware needs to handle the traffic appropriately based on where it's being received or sent. Thanks, -Prem > > Thanks > Andrew -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Thu, Nov 12, 2015 at 05:02:18PM CET, pjonnala@broadcom.com wrote: > >Packet forwarding to/from bond interfaces is done in software. > > > >This patch enables certain platforms to bridge traffic to/from > >bond interfaces in hardware. Notifications are sent out when > >the "active" slave set for a bond interface is updated in > >software. Platforms use the notifications to program the > >hardware accordingly. The changes have been verified to work > >with configured and 802.3ad bond interfaces. > > > >Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> Thank you for the comments Jiri. > This patch is wrong, in many different acpects. Leaving the submission > style, and no in-tree consumer aside, adding ndos for this thing is > unacceptable. It should be handled as a part of switchdev attrs. > Also, the solution should not be bonding-centric. Can you elaborate on how you envision the solution to be, when you say the solution should not be "bonding centric"? Thanks Prem > > I have a patchset in my queue which does this correctly, for bond and team > using switchdev attr and with actual in-tree consumer, mlxsw driver. > I plan to send that soon after net-next opens. > > Jiri -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Premkumar Jonnala > Sent: Monday, November 16, 2015 11:42 AM > To: 'David Miller'; f.fainelli@gmail.com > Cc: netdev@vger.kernel.org; sfeldma@gmail.com; jiri@resnulli.us; > nikolay@cumulusnetworks.com; idosch@mellanox.com; > gospo@cumulusnetworks.com > Subject: RE: [PATCH] bonding: Offloading bonds to hardware > > > > > -----Original Message----- > > From: David Miller [mailto:davem@davemloft.net] > > Sent: Saturday, November 14, 2015 12:40 AM > > To: f.fainelli@gmail.com > > Cc: Premkumar Jonnala; netdev@vger.kernel.org; sfeldma@gmail.com; > > jiri@resnulli.us; nikolay@cumulusnetworks.com; idosch@mellanox.com; > > gospo@cumulusnetworks.com > > Subject: Re: [PATCH] bonding: Offloading bonds to hardware > > > > From: Florian Fainelli <f.fainelli@gmail.com> > > Date: Fri, 13 Nov 2015 10:38:48 -0800 > > > > > This is a good explanation of why you want the changes, and how this is > > > implemented in a system utilizing that, but this is not documenting why > > > you are making these changes to the bonding code, nor how they are > > > supposed to be used by an implementor driver, since there is no such > > > user posted (yet?). > > > > I am basically not even going to look at proposals for new device ops > > that don't also show an actual user of the new interfaces. > > > > That's not how we do development, and not providing a real example > > user of a new set of interfaces makes it impossible to properly review > > such changes. > > Thank you for the feedback David. I am currently handling/implementing > these notifications in a custom (closed source) driver. I will update the diff > with an actual demo of the handler, and resend the diffs. Clarification: When I say a "closed source" driver, I was implying a closed-source user-level application that programs hardware. Thanks Prem > > Thanks > Prem -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Premkumar Jonnala <pjonnala@broadcom.com> Date: Mon, 16 Nov 2015 06:12:24 +0000 > Thank you for the feedback David. I am currently handling/implementing > these notifications in a custom (closed source) driver. I will update the diff > with an actual demo of the handler, and resend the diffs. No, I don't want a damn "demo", I want a real use in an upstream driver. You cannot add infrastructure that only has a real use in your out-of-tree or closed source driver. That is _completely_ and _totally_ unacceptable. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Premkumar Jonnala <pjonnala@broadcom.com> Date: Mon, 16 Nov 2015 06:49:49 +0000 >> Thank you for the feedback David. I am currently handling/implementing >> these notifications in a custom (closed source) driver. I will update the diff >> with an actual demo of the handler, and resend the diffs. > > Clarification: When I say a "closed source" driver, I was implying a closed-source > user-level application that programs hardware. This makes no sense at all. You're adding kernel level hooks that some kernel level driver, upstream, has to make use of. A "closed source" user level component has no bearing whatsoever upon this. I want to see a kernel driver that's in the upstream tree now, implementing support for the new facility. Period. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Mon, Nov 16, 2015 at 07:48:34AM CET, pjonnala@broadcom.com wrote: >> >> Thu, Nov 12, 2015 at 05:02:18PM CET, pjonnala@broadcom.com wrote: >> >Packet forwarding to/from bond interfaces is done in software. >> > >> >This patch enables certain platforms to bridge traffic to/from >> >bond interfaces in hardware. Notifications are sent out when >> >the "active" slave set for a bond interface is updated in >> >software. Platforms use the notifications to program the >> >hardware accordingly. The changes have been verified to work >> >with configured and 802.3ad bond interfaces. >> > >> >Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> > >Thank you for the comments Jiri. > >> This patch is wrong, in many different acpects. Leaving the submission >> style, and no in-tree consumer aside, adding ndos for this thing is >> unacceptable. It should be handled as a part of switchdev attrs. >> Also, the solution should not be bonding-centric. > >Can you elaborate on how you envision the solution to be, when you say >the solution should not be "bonding centric"? You should be able to offload team as well, using the same api. > >Thanks >Prem > >> >> I have a patchset in my queue which does this correctly, for bond and team >> using switchdev attr and with actual in-tree consumer, mlxsw driver. >> I plan to send that soon after net-next opens. >> >> Jiri -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 15-11-15 01:01 AM, Jiri Pirko wrote: > Sun, Nov 15, 2015 at 06:51:28AM CET, john.fastabend@gmail.com wrote: >> On 15-11-14 01:39 AM, Jiri Pirko wrote: >>> Thu, Nov 12, 2015 at 05:02:18PM CET, pjonnala@broadcom.com wrote: >>>> Packet forwarding to/from bond interfaces is done in software. >>>> >>>> This patch enables certain platforms to bridge traffic to/from >>>> bond interfaces in hardware. Notifications are sent out when >>>> the "active" slave set for a bond interface is updated in >>>> software. Platforms use the notifications to program the >>>> hardware accordingly. The changes have been verified to work >>>> with configured and 802.3ad bond interfaces. >>>> >>>> Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> >>> >>> This patch is wrong, in many different acpects. Leaving the submission >>> style, and no in-tree consumer aside, adding ndos for this thing is >>> unacceptable. It should be handled as a part of switchdev attrs. >> >> Why is it unacceptable? I think its at least worth debating. If I >> have a nic that can do bonding but none of the other switchdev >> things then implementing another ndo is certainly more straight >> forward. As it is heading many of the 10+Gbps nics may need to >> implement just enough of the switchdev infrastructure to get things >> like bonding up and working. Not necessarily a bad thing if we make >> the switchdev infrastructure light but does sort of make the name >> confusing if my nic is not doing any switching ;) > > Can you please describe what exaclty such a NIC functionality would look > like? If there's not switching/forwarding, then the packets would go > trought slow-path (kernel bonding/team driver). So why would we need to > tell anything to driver/hw? > Mostly I was thinking about direct assigned functions into a container or VM. In this case the hardware may be able to create a virtual function or physical function that does bonding in hardware and exposes this to the OS. This seems a bit different then switchdev at the moment because functions are a PCIe concept and we don't have netdev for each VF necessarily in the host/hypervisor. Also the traffic endpoint is the host albeit a VM/container. The current situation is to create a ndo op to manage every knob on the functions by passing an index, > > int (*ndo_set_vf_mac)(struct net_device *dev, > int queue, u8 *mac); > int (*ndo_set_vf_vlan)(struct net_device *dev, > int queue, u16 vlan, u8 qos); > [...] at linux plumbers conference in Seattle some of us kicked around an idea to create "management" netdevs similar in style to what Florian(?) proposed with his L2 only netdevs. This might provide a mechanism to bring SR-IOV into the switchdev fold. But on the other hand I don't want to manage an edge NIC the same way as a switch but this doesn't necessarily mean the low level driver API can't be the same. So I think it needs more thought is all and also to your point SR-IOV does really imply some sort of switching/forwarding logic. It might be a good topic for netdev conference in Seville. Thanks! John > Thanks. > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b4351ca..4b53733 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3759,6 +3759,101 @@ err: bond_slave_arr_work_rearm(bond, 1); } +static int slave_present(struct slave *slave, struct bond_up_slave *arr) +{ + int i; + + if (!arr) + return 0; + + for (i = 0; i < arr->count; i++) { + if (arr->arr[i] == slave) + return 1; + } + return 0; +} + +/* Send notification to clear/remove slaves for 'bond' in 'arr' except for + * slaves in 'ignore_arr'. + */ +static int bond_slave_arr_clear_notify(struct bonding *bond, + struct bond_up_slave *arr, + struct bond_up_slave *ignore_arr) +{ + struct slave *slave; + struct net_device *slave_dev; + int i, rv; + const struct net_device_ops *ops; + + if (!bond->dev || !arr) + return -EINVAL; + + rv = 0; + for (i = 0; i < arr->count; i++) { + slave = arr->arr[i]; + if (!slave || !slave->dev) + continue; + + slave_dev = slave->dev; + if (slave_present(slave, ignore_arr)) { + netdev_dbg(bond->dev, "ignoring clear of slave %s\n", + slave_dev->name); + continue; + } + ops = slave_dev->netdev_ops; + if (!ops || !ops->ndo_bond_slave_discard) { + netdev_dbg(bond->dev, "No slave discard ops for %s\n", + slave_dev->name); + continue; + } + rv = ops->ndo_bond_slave_discard(slave_dev, bond->dev); + if (rv < 0) + return rv; + } + return rv; +} + +/* Send notification about updated slaves for 'bond' except for slaves in + * 'ignore_arr'. + */ +static int bond_slave_arr_set_notify(struct bonding *bond, + struct bond_up_slave *ignore_arr) +{ + struct slave *slave; + struct net_device *slave_dev; + struct bond_up_slave *arr; + int i, rv; + const struct net_device_ops *ops; + + if (!bond || !bond->dev) + return -EINVAL; + rv = 0; + + arr = rtnl_dereference(bond->slave_arr); + if (!arr) + return -EINVAL; + + for (i = 0; i < arr->count; i++) { + slave = arr->arr[i]; + slave_dev = slave->dev; + if (slave_present(slave, ignore_arr)) { + netdev_dbg(bond->dev, "ignoring add of slave %s\n", + slave->dev->name); + continue; + } + ops = slave_dev->netdev_ops; + if (!ops || !ops->ndo_bond_slave_add) { + netdev_dbg(bond->dev, "No slave add ops for %s\n", + slave_dev->name); + continue; + } + rv = ops->ndo_bond_slave_add(slave_dev, bond->dev); + if (rv < 0) + return rv; + } + return rv; +} + /* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD @@ -3771,7 +3866,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) { struct slave *slave; struct list_head *iter; - struct bond_up_slave *new_arr, *old_arr; + struct bond_up_slave *new_arr, *old_arr, *discard_arr = 0; int agg_id = 0; int ret = 0; @@ -3786,6 +3881,12 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) pr_err("Failed to build slave-array.\n"); goto out; } + discard_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]), + GFP_KERNEL); + if (!discard_arr) { + ret = -ENOMEM; + goto out; + } if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info ad_info; @@ -3797,6 +3898,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) */ old_arr = rtnl_dereference(bond->slave_arr); if (old_arr) { + bond_slave_arr_clear_notify(bond, old_arr, 0); RCU_INIT_POINTER(bond->slave_arr, NULL); kfree_rcu(old_arr, rcu); } @@ -3809,8 +3911,10 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) struct aggregator *agg; agg = SLAVE_AD_INFO(slave)->port.aggregator; - if (!agg || agg->aggregator_identifier != agg_id) + if (!agg || agg->aggregator_identifier != agg_id) { + discard_arr->arr[discard_arr->count++] = slave; continue; + } } if (!bond_slave_can_tx(slave)) continue; @@ -3820,10 +3924,15 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) } old_arr = rtnl_dereference(bond->slave_arr); + bond_slave_arr_clear_notify(bond, old_arr, new_arr); + bond_slave_arr_clear_notify(bond, discard_arr, 0); rcu_assign_pointer(bond->slave_arr, new_arr); + bond_slave_arr_set_notify(bond, old_arr); if (old_arr) kfree_rcu(old_arr, rcu); out: + if (discard_arr) + kfree(discard_arr); if (ret != 0 && skipslave) { int idx; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4ac653b..facc35f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1236,6 +1236,10 @@ struct net_device_ops { bool proto_down); int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); + int (*ndo_bond_slave_add)(struct net_device *slave_dev, + struct net_device *bond); + int (*ndo_bond_slave_discard)(struct net_device *slave_dev, + struct net_device *bond); }; /**
Packet forwarding to/from bond interfaces is done in software. This patch enables certain platforms to bridge traffic to/from bond interfaces in hardware. Notifications are sent out when the "active" slave set for a bond interface is updated in software. Platforms use the notifications to program the hardware accordingly. The changes have been verified to work with configured and 802.3ad bond interfaces. Signed-off-by: Premkumar Jonnala <pjonnala@broadcom.com> --- -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html