Message ID | 20181018035917.19413-6-sam@mendozajonas.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
Series | net/ncsi: Allow enabling multiple packages & channels | expand |
> +int ncsi_reset_dev(struct ncsi_dev *nd) > +{ > + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); > + struct ncsi_channel *nc, *active; > + struct ncsi_package *np; > + unsigned long flags; > + bool enabled; > + int state; > + > + active = NULL; > + NCSI_FOR_EACH_PACKAGE(ndp, np) { > + NCSI_FOR_EACH_CHANNEL(np, nc) { > + spin_lock_irqsave(&nc->lock, flags); > + enabled = nc->monitor.enabled; > + state = nc->state; > + spin_unlock_irqrestore(&nc->lock, flags); > + > + if (enabled) > + ncsi_stop_channel_monitor(nc); > + if (state == NCSI_CHANNEL_ACTIVE) { > + active = nc; > + break; Is the original intention to process the channel one by one? If it is the case, there are two loops and we might need to use "goto found" instead. > + } > + } > + } > + found: ? > + if (!active) { > + /* Done */ > + spin_lock_irqsave(&ndp->lock, flags); > + ndp->flags &= ~NCSI_DEV_RESET; > + spin_unlock_irqrestore(&ndp->lock, flags); > + return ncsi_choose_active_channel(ndp); > + } > + > + spin_lock_irqsave(&ndp->lock, flags); > + ndp->flags |= NCSI_DEV_RESET; > + ndp->active_channel = active; > + ndp->active_package = active->package; > + spin_unlock_irqrestore(&ndp->lock, flags); > + > + nd->state = ncsi_dev_state_suspend; > + schedule_work(&ndp->work); > + return 0; > +} Also similar issue in ncsi_choose_active_channel() function below. > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) > > ncm = &nc->modes[NCSI_MODE_LINK]; > if (ncm->data[2] & 0x1) { > - spin_unlock_irqrestore(&nc->lock, flags); > found = nc; > - goto out; > + with_link = true; > } > > - spin_unlock_irqrestore(&nc->lock, flags); > + /* If multi_channel is enabled configure all valid > + * channels whether or not they currently have link > + * so they will have AENs enabled. > + */ > + if (with_link || np->multi_channel) { I notice that there is a case that we will misconfigure the interface. For example below, multi-channel is not enable for package 1. But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first channel for that package with link. cat /sys/kernel/debug/ncsi_protocol/ncsi_device_ IFIDX IFNAME NAME PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA ===================================================================== 2 eth2 ncsi0 000 000 1 1 1 1 1 1 0 2 1 1 1 1 0 1 2 eth2 ncsi1 000 001 1 0 1 1 1 1 0 2 1 1 1 1 0 1 2 eth2 ncsi2 001 000 1 0 1 0 1 1 0 2 1 1 1 1 0 1 2 eth2 ncsi3 001 001 0 0 1 0 1 1 0 1 0 1 1 1 0 1 ===================================================================== MP: Multi-mode Package WP: Whitelist Package MC: Multi-mode Channel WC: Whitelist Channel PC: Primary Channel CS: Channel State IA/A/IV 1/2/3 PS: Poll Status LS: Link Status RU: Running CR: Carrier OK NQ: Queue Stopped HA: Hardware Arbitration I temporally change to the following to avoid that. if ((with_link && !np->multi_channel && list_empty(&ndp->channel_queue)) || np->multi_channel) { > + spin_lock_irqsave(&ndp->lock, flags); > + list_add_tail_rcu(&nc->link, > + &ndp->channel_queue); > + spin_unlock_irqrestore(&ndp->lock, flags); > + > + netdev_dbg(ndp->ndev.dev, > + "NCSI: Channel %u added to queue (link %s)\n", > + nc->id, > + ncm->data[2] & 0x1 ? "up" : "down"); > + } > + > + spin_unlock_irqrestore(&nc->lock, cflags); > + > + if (with_link && !np->multi_channel) > + break; Similar issue here. As we are using break, so each package will configure one active TX. > } > + if (with_link && !ndp->multi_package) > + break; > } > > - if (!found) { > + if (list_empty(&ndp->channel_queue) && found) { > + netdev_info(ndp->ndev.dev, > + "NCSI: No channel with link found, configuring channel %u\n", > + found->id); > + spin_lock_irqsave(&ndp->lock, flags); > + list_add_tail_rcu(&found->link, &ndp->channel_queue); > + spin_unlock_irqrestore(&ndp->lock, flags); > + } else if (!found) { > netdev_warn(ndp->ndev.dev, > - "NCSI: No channel found with link\n"); > + "NCSI: No channel found to configure!\n"); > ncsi_report_link(ndp, true); > return -ENODEV; > } Also, for deselect package handler function, do we want to set to inactive here? If we just change the state, the cached data still keeps the old value. If the new ncsi_reset_dev() function is handling one by one, can we skip this part? static int ncsi_rsp_handler_dp(struct ncsi_request *nr) { struct ncsi_rsp_pkt *rsp; struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_package *np; struct ncsi_channel *nc; unsigned long flags; /* Find the package */ rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, &np, NULL); if (!np) return -ENODEV; /* Change state of all channels attached to the package */ NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); } return 0; }
On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote: > > +int ncsi_reset_dev(struct ncsi_dev *nd) > > +{ > > + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); > > + struct ncsi_channel *nc, *active; > > + struct ncsi_package *np; > > + unsigned long flags; > > + bool enabled; > > + int state; > > + > > + active = NULL; > > + NCSI_FOR_EACH_PACKAGE(ndp, np) { > > + NCSI_FOR_EACH_CHANNEL(np, nc) { > > + spin_lock_irqsave(&nc->lock, flags); > > + enabled = nc->monitor.enabled; > > + state = nc->state; > > + spin_unlock_irqrestore(&nc->lock, flags); > > + > > + if (enabled) > > + ncsi_stop_channel_monitor(nc); > > + if (state == NCSI_CHANNEL_ACTIVE) { > > + active = nc; > > + break; > > Is the original intention to process the channel one by one? > If it is the case, there are two loops and we might need to use > "goto found" instead. Yes we'll need to break out of the package loop here as well. > > > + } > > + } > > + } > > + > > found: ? > > > + if (!active) { > > + /* Done */ > > + spin_lock_irqsave(&ndp->lock, flags); > > + ndp->flags &= ~NCSI_DEV_RESET; > > + spin_unlock_irqrestore(&ndp->lock, flags); > > + return ncsi_choose_active_channel(ndp); > > + } > > + > > + spin_lock_irqsave(&ndp->lock, flags); > > + ndp->flags |= NCSI_DEV_RESET; > > + ndp->active_channel = active; > > + ndp->active_package = active->package; > > + spin_unlock_irqrestore(&ndp->lock, flags); > > + > > + nd->state = ncsi_dev_state_suspend; > > + schedule_work(&ndp->work); > > + return 0; > > +} > > Also similar issue in ncsi_choose_active_channel() function below. > > > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) > > > > ncm = &nc->modes[NCSI_MODE_LINK]; > > if (ncm->data[2] & 0x1) { > > - spin_unlock_irqrestore(&nc->lock, flags); > > found = nc; > > - goto out; > > + with_link = true; > > } > > > > - spin_unlock_irqrestore(&nc->lock, flags); > > + /* If multi_channel is enabled configure all valid > > + * channels whether or not they currently have link > > + * so they will have AENs enabled. > > + */ > > + if (with_link || np->multi_channel) { > > I notice that there is a case that we will misconfigure the interface. > For example below, multi-channel is not enable for package 1. > But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first > channel for that package with link. I don't think I see the issue here; multi-channel is not set on package 1, but both channels are in the channel whitelist. Channel 0 is configured since it's the first found on package 1, and channel 1 is not since channel 0 is already found. Are you expecting something different? > > cat /sys/kernel/debug/ncsi_protocol/ncsi_device_ > IFIDX IFNAME NAME PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA > ===================================================================== > 2 eth2 ncsi0 000 000 1 1 1 1 1 1 0 2 1 1 1 1 0 1 > 2 eth2 ncsi1 000 001 1 0 1 1 1 1 0 2 1 1 1 1 0 1 > 2 eth2 ncsi2 001 000 1 0 1 0 1 1 0 2 1 1 1 1 0 1 > 2 eth2 ncsi3 001 001 0 0 1 0 1 1 0 1 0 1 1 1 0 1 > ===================================================================== > MP: Multi-mode Package WP: Whitelist Package > MC: Multi-mode Channel WC: Whitelist Channel > PC: Primary Channel CS: Channel State IA/A/IV 1/2/3 > PS: Poll Status LS: Link Status > RU: Running CR: Carrier OK > NQ: Queue Stopped HA: Hardware Arbitration > > I temporally change to the following to avoid that. > if ((with_link && > !np->multi_channel && > list_empty(&ndp->channel_queue)) || np->multi_channel) { > > > + spin_lock_irqsave(&ndp->lock, flags); > > + list_add_tail_rcu(&nc->link, > > + &ndp->channel_queue); > > + spin_unlock_irqrestore(&ndp->lock, flags); > > + > > + netdev_dbg(ndp->ndev.dev, > > + "NCSI: Channel %u added to queue (link %s)\n", > > + nc->id, > > + ncm->data[2] & 0x1 ? "up" : "down"); > > + } > > + > > + spin_unlock_irqrestore(&nc->lock, cflags); > > + > > + if (with_link && !np->multi_channel) > > + break; > > Similar issue here. As we are using break, so each package will configure one active TX. > I believe this is handled properly in ncsi_channel_is_tx() in the most recent revision. > > } > > + if (with_link && !ndp->multi_package) > > + break; > > } > > > > - if (!found) { > > + if (list_empty(&ndp->channel_queue) && found) { > > + netdev_info(ndp->ndev.dev, > > + "NCSI: No channel with link found, configuring channel %u\n", > > + found->id); > > + spin_lock_irqsave(&ndp->lock, flags); > > + list_add_tail_rcu(&found->link, &ndp->channel_queue); > > + spin_unlock_irqrestore(&ndp->lock, flags); > > + } else if (!found) { > > netdev_warn(ndp->ndev.dev, > > - "NCSI: No channel found with link\n"); > > + "NCSI: No channel found to configure!\n"); > > ncsi_report_link(ndp, true); > > return -ENODEV; > > } > > Also, for deselect package handler function, do we want to set to inactive here? > If we just change the state, the cached data still keeps the old value. If the new > ncsi_reset_dev() function is handling one by one, can we skip this part? Technically yes we could skip the state change here since ncsi_reset_dev() will have already done it. However if we send a DP command via some other means then it is probably best to ensure we treat all channels on that package as inactive. > > static int ncsi_rsp_handler_dp(struct ncsi_request *nr) > { > struct ncsi_rsp_pkt *rsp; > struct ncsi_dev_priv *ndp = nr->ndp; > struct ncsi_package *np; > struct ncsi_channel *nc; > unsigned long flags; > > /* Find the package */ > rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); > ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, > &np, NULL); > if (!np) > return -ENODEV; > > /* Change state of all channels attached to the package */ > NCSI_FOR_EACH_CHANNEL(np, nc) { > spin_lock_irqsave(&nc->lock, flags); > nc->state = NCSI_CHANNEL_INACTIVE; > > spin_unlock_irqrestore(&nc->lock, flags); > } > > return 0; > } > >
> On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote: > > > +int ncsi_reset_dev(struct ncsi_dev *nd) > > > +{ > > > + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); > > > + struct ncsi_channel *nc, *active; > > > + struct ncsi_package *np; > > > + unsigned long flags; > > > + bool enabled; > > > + int state; > > > + > > > + active = NULL; > > > + NCSI_FOR_EACH_PACKAGE(ndp, np) { > > > + NCSI_FOR_EACH_CHANNEL(np, nc) { > > > + spin_lock_irqsave(&nc->lock, flags); > > > + enabled = nc->monitor.enabled; > > > + state = nc->state; > > > + spin_unlock_irqrestore(&nc->lock, flags); > > > + > > > + if (enabled) > > > + ncsi_stop_channel_monitor(nc); > > > + if (state == NCSI_CHANNEL_ACTIVE) { > > > + active = nc; > > > + break; > > > > Is the original intention to process the channel one by one? > > If it is the case, there are two loops and we might need to use > > "goto found" instead. > > Yes we'll need to break out of the package loop here as well. > > > > > > + } > > > + } > > > + } > > > + > > > > found: ? > > > > > + if (!active) { > > > + /* Done */ > > > + spin_lock_irqsave(&ndp->lock, flags); > > > + ndp->flags &= ~NCSI_DEV_RESET; > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > + return ncsi_choose_active_channel(ndp); > > > + } > > > + > > > + spin_lock_irqsave(&ndp->lock, flags); > > > + ndp->flags |= NCSI_DEV_RESET; > > > + ndp->active_channel = active; > > > + ndp->active_package = active->package; > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > + > > > + nd->state = ncsi_dev_state_suspend; > > > + schedule_work(&ndp->work); > > > + return 0; > > > +} > > > > Also similar issue in ncsi_choose_active_channel() function below. > > > > > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) > > > > > > ncm = &nc->modes[NCSI_MODE_LINK]; > > > if (ncm->data[2] & 0x1) { > > > - spin_unlock_irqrestore(&nc->lock, flags); > > > found = nc; > > > - goto out; > > > + with_link = true; > > > } > > > > > > - spin_unlock_irqrestore(&nc->lock, flags); > > > + /* If multi_channel is enabled configure all valid > > > + * channels whether or not they currently have link > > > + * so they will have AENs enabled. > > > + */ > > > + if (with_link || np->multi_channel) { > > > > I notice that there is a case that we will misconfigure the interface. > > For example below, multi-channel is not enable for package 1. > > But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first > > channel for that package with link. > > I don't think I see the issue here; multi-channel is not set on package > 1, but both channels are in the channel whitelist. Channel 0 is > configured since it's the first found on package 1, and channel 1 is not > since channel 0 is already found. Are you expecting something different? > The setting is that multi-package is enable for both package 0 and 1. Multi-channel is only enabled for package 0. > > > > cat /sys/kernel/debug/ncsi_protocol/ncsi_device_ > > IFIDX IFNAME NAME PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA > > ===================================================================== > > 2 eth2 ncsi0 000 000 1 1 1 1 1 1 0 2 1 1 1 1 0 1 > > 2 eth2 ncsi1 000 001 1 0 1 1 1 1 0 2 1 1 1 1 0 1 > > 2 eth2 ncsi2 001 000 1 0 1 0 1 1 0 2 1 1 1 1 0 1 I was replying to the wrong old email and it might cause a bit confusion. The first 1 meaning channel is enabled for package 1 channel 0 (ncsi2). For eth2, we already has ncsi0 as the active channel with TX enable. I would think that package doesn't have the multi-channel enabled and we should not enable the channel for ncsi2. The problem is that package 1 doesn't enable the multi-channel and it believes it needs to enable one channel for its package but it doesn't aware that the other package already has one active channel. > > 2 eth2 ncsi3 001 001 0 0 1 0 1 1 0 1 0 1 1 1 0 1 > > ===================================================================== > > MP: Multi-mode Package WP: Whitelist Package > > MC: Multi-mode Channel WC: Whitelist Channel > > PC: Primary Channel CS: Channel State IA/A/IV 1/2/3 > > PS: Poll Status LS: Link Status > > RU: Running CR: Carrier OK > > NQ: Queue Stopped HA: Hardware Arbitration > > > > I temporally change to the following to avoid that. > > if ((with_link && > > !np->multi_channel && > > list_empty(&ndp->channel_queue)) || np->multi_channel) { > > > > > + spin_lock_irqsave(&ndp->lock, flags); > > > + list_add_tail_rcu(&nc->link, > > > + &ndp->channel_queue); > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > + > > > + netdev_dbg(ndp->ndev.dev, > > > + "NCSI: Channel %u added to queue (link %s)\n", > > > + nc->id, > > > + ncm->data[2] & 0x1 ? "up" : "down"); > > > + } > > > + > > > + spin_unlock_irqrestore(&nc->lock, cflags); > > > + > > > + if (with_link && !np->multi_channel) > > > + break; > > > > Similar issue here. As we are using break, so each package will configure one active TX. > > > > I believe this is handled properly in ncsi_channel_is_tx() in the most > recent revision. I saw this issue with the last revision. I was using the wrong email to reply. > > > > } > > > + if (with_link && !ndp->multi_package) > > > + break; > > > } > > > > > > - if (!found) { > > > + if (list_empty(&ndp->channel_queue) && found) { > > > + netdev_info(ndp->ndev.dev, > > > + "NCSI: No channel with link found, configuring channel %u\n", > > > + found->id); > > > + spin_lock_irqsave(&ndp->lock, flags); > > > + list_add_tail_rcu(&found->link, &ndp->channel_queue); > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > + } else if (!found) { > > > netdev_warn(ndp->ndev.dev, > > > - "NCSI: No channel found with link\n"); > > > + "NCSI: No channel found to configure!\n"); > > > ncsi_report_link(ndp, true); > > > return -ENODEV; > > > } > > > > Also, for deselect package handler function, do we want to set to inactive here? > > If we just change the state, the cached data still keeps the old value. If the new > > ncsi_reset_dev() function is handling one by one, can we skip this part? > > Technically yes we could skip the state change here since > ncsi_reset_dev() will have already done it. However if we send a DP > command via some other means then it is probably best to ensure we treat > all channels on that package as inactive. When I tested, if I didn't comment out the state change in response handler, ncsi_reset_dev() function will not handle properly and some channels got into invisible state and at the end we lost those selectable channels. > > > > > static int ncsi_rsp_handler_dp(struct ncsi_request *nr) > > { > > struct ncsi_rsp_pkt *rsp; > > struct ncsi_dev_priv *ndp = nr->ndp; > > struct ncsi_package *np; > > struct ncsi_channel *nc; > > unsigned long flags; > > > > /* Find the package */ > > rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); > > ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, > > &np, NULL); > > if (!np) > > return -ENODEV; > > > > /* Change state of all channels attached to the package */ > > NCSI_FOR_EACH_CHANNEL(np, nc) { > > spin_lock_irqsave(&nc->lock, flags); > > nc->state = NCSI_CHANNEL_INACTIVE; > > > > spin_unlock_irqrestore(&nc->lock, flags); > > } > > > > return 0; > > } > > > >
On Mon, 2018-11-05 at 18:01 +0000, Justin.Lee1@Dell.com wrote: > > On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote: > > > > +int ncsi_reset_dev(struct ncsi_dev *nd) > > > > +{ > > > > + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); > > > > + struct ncsi_channel *nc, *active; > > > > + struct ncsi_package *np; > > > > + unsigned long flags; > > > > + bool enabled; > > > > + int state; > > > > + > > > > + active = NULL; > > > > + NCSI_FOR_EACH_PACKAGE(ndp, np) { > > > > + NCSI_FOR_EACH_CHANNEL(np, nc) { > > > > + spin_lock_irqsave(&nc->lock, flags); > > > > + enabled = nc->monitor.enabled; > > > > + state = nc->state; > > > > + spin_unlock_irqrestore(&nc->lock, flags); > > > > + > > > > + if (enabled) > > > > + ncsi_stop_channel_monitor(nc); > > > > + if (state == NCSI_CHANNEL_ACTIVE) { > > > > + active = nc; > > > > + break; > > > > > > Is the original intention to process the channel one by one? > > > If it is the case, there are two loops and we might need to use > > > "goto found" instead. > > > > Yes we'll need to break out of the package loop here as well. > > > > > > + } > > > > + } > > > > + } > > > > + > > > > > > found: ? > > > > > > > + if (!active) { > > > > + /* Done */ > > > > + spin_lock_irqsave(&ndp->lock, flags); > > > > + ndp->flags &= ~NCSI_DEV_RESET; > > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > > + return ncsi_choose_active_channel(ndp); > > > > + } > > > > + > > > > + spin_lock_irqsave(&ndp->lock, flags); > > > > + ndp->flags |= NCSI_DEV_RESET; > > > > + ndp->active_channel = active; > > > > + ndp->active_package = active->package; > > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > > + > > > > + nd->state = ncsi_dev_state_suspend; > > > > + schedule_work(&ndp->work); > > > > + return 0; > > > > +} > > > > > > Also similar issue in ncsi_choose_active_channel() function below. > > > > > > > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) > > > > > > > > ncm = &nc->modes[NCSI_MODE_LINK]; > > > > if (ncm->data[2] & 0x1) { > > > > - spin_unlock_irqrestore(&nc->lock, flags); > > > > found = nc; > > > > - goto out; > > > > + with_link = true; > > > > } > > > > > > > > - spin_unlock_irqrestore(&nc->lock, flags); > > > > + /* If multi_channel is enabled configure all valid > > > > + * channels whether or not they currently have link > > > > + * so they will have AENs enabled. > > > > + */ > > > > + if (with_link || np->multi_channel) { > > > > > > I notice that there is a case that we will misconfigure the interface. > > > For example below, multi-channel is not enable for package 1. > > > But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first > > > channel for that package with link. > > > > I don't think I see the issue here; multi-channel is not set on package > > 1, but both channels are in the channel whitelist. Channel 0 is > > configured since it's the first found on package 1, and channel 1 is not > > since channel 0 is already found. Are you expecting something different? > > > > The setting is that multi-package is enable for both package 0 and 1. > Multi-channel is only enabled for package 0. > > > > cat /sys/kernel/debug/ncsi_protocol/ncsi_device_ > > > IFIDX IFNAME NAME PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA > > > ===================================================================== > > > 2 eth2 ncsi0 000 000 1 1 1 1 1 1 0 2 1 1 1 1 0 1 > > > 2 eth2 ncsi1 000 001 1 0 1 1 1 1 0 2 1 1 1 1 0 1 > > > 2 eth2 ncsi2 001 000 1 0 1 0 1 1 0 2 1 1 1 1 0 1 > > I was replying to the wrong old email and it might cause a bit confusion. > The first 1 meaning channel is enabled for package 1 channel 0 (ncsi2). > For eth2, we already has ncsi0 as the active channel with TX enable. > I would think that package doesn't have the multi-channel enabled and > we should not enable the channel for ncsi2. The problem is that package 1 doesn't > enable the multi-channel and it believes it needs to enable one channel for its package > but it doesn't aware that the other package already has one active channel. Ah, maybe the confusion here is that multi_channel is a per-package setting; it determines what a package does with its own channels. So you have package 0 with multi-channel enabled so it enables channels 0 & 1. Then you have package 1 without multi-channel so it enables only channel 0. There is still only one Tx channel (package 0, channel 0). Does that sound right, or have I missed something? > > > > 2 eth2 ncsi3 001 001 0 0 1 0 1 1 0 1 0 1 1 1 0 1 > > > ===================================================================== > > > MP: Multi-mode Package WP: Whitelist Package > > > MC: Multi-mode Channel WC: Whitelist Channel > > > PC: Primary Channel CS: Channel State IA/A/IV 1/2/3 > > > PS: Poll Status LS: Link Status > > > RU: Running CR: Carrier OK > > > NQ: Queue Stopped HA: Hardware Arbitration > > > > > > I temporally change to the following to avoid that. > > > if ((with_link && > > > !np->multi_channel && > > > list_empty(&ndp->channel_queue)) || np->multi_channel) { > > > > > > > + spin_lock_irqsave(&ndp->lock, flags); > > > > + list_add_tail_rcu(&nc->link, > > > > + &ndp->channel_queue); > > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > > + > > > > + netdev_dbg(ndp->ndev.dev, > > > > + "NCSI: Channel %u added to queue (link %s)\n", > > > > + nc->id, > > > > + ncm->data[2] & 0x1 ? "up" : "down"); > > > > + } > > > > + > > > > + spin_unlock_irqrestore(&nc->lock, cflags); > > > > + > > > > + if (with_link && !np->multi_channel) > > > > + break; > > > > > > Similar issue here. As we are using break, so each package will configure one active TX. > > > > > > > I believe this is handled properly in ncsi_channel_is_tx() in the most > > recent revision. > > I saw this issue with the last revision. I was using the wrong email to reply. > > > > > } > > > > + if (with_link && !ndp->multi_package) > > > > + break; > > > > } > > > > > > > > - if (!found) { > > > > + if (list_empty(&ndp->channel_queue) && found) { > > > > + netdev_info(ndp->ndev.dev, > > > > + "NCSI: No channel with link found, configuring channel %u\n", > > > > + found->id); > > > > + spin_lock_irqsave(&ndp->lock, flags); > > > > + list_add_tail_rcu(&found->link, &ndp->channel_queue); > > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > > + } else if (!found) { > > > > netdev_warn(ndp->ndev.dev, > > > > - "NCSI: No channel found with link\n"); > > > > + "NCSI: No channel found to configure!\n"); > > > > ncsi_report_link(ndp, true); > > > > return -ENODEV; > > > > } > > > > > > Also, for deselect package handler function, do we want to set to inactive here? > > > If we just change the state, the cached data still keeps the old value. If the new > > > ncsi_reset_dev() function is handling one by one, can we skip this part? > > > > Technically yes we could skip the state change here since > > ncsi_reset_dev() will have already done it. However if we send a DP > > command via some other means then it is probably best to ensure we treat > > all channels on that package as inactive. > > When I tested, if I didn't comment out the state change in response handler, > ncsi_reset_dev() function will not handle properly and some channels got into > invisible state and at the end we lost those selectable channels. Well that's not good :) The deselect package command should only be sent once all channels on a package have become inactive, so I wouldn't expect it to interfere with ncsi_reset_dev(), but perhaps this gets combined with the issues sending netlink commands close together. I'll investigate, if I'm lucky this will be resolved with the same fixes. > > > > static int ncsi_rsp_handler_dp(struct ncsi_request *nr) > > > { > > > struct ncsi_rsp_pkt *rsp; > > > struct ncsi_dev_priv *ndp = nr->ndp; > > > struct ncsi_package *np; > > > struct ncsi_channel *nc; > > > unsigned long flags; > > > > > > /* Find the package */ > > > rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); > > > ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, > > > &np, NULL); > > > if (!np) > > > return -ENODEV; > > > > > > /* Change state of all channels attached to the package */ > > > NCSI_FOR_EACH_CHANNEL(np, nc) { > > > spin_lock_irqsave(&nc->lock, flags); > > > nc->state = NCSI_CHANNEL_INACTIVE; > > > > > > spin_unlock_irqrestore(&nc->lock, flags); > > > } > > > > > > return 0; > > > } > > > > > > > >
> On Mon, 2018-11-05 at 18:01 +0000, Justin.Lee1@Dell.com wrote: > > > On Tue, 2018-10-30 at 21:26 +0000, Justin.Lee1@Dell.com wrote: > > > > > +int ncsi_reset_dev(struct ncsi_dev *nd) > > > > > +{ > > > > > + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); > > > > > + struct ncsi_channel *nc, *active; > > > > > + struct ncsi_package *np; > > > > > + unsigned long flags; > > > > > + bool enabled; > > > > > + int state; > > > > > + > > > > > + active = NULL; > > > > > + NCSI_FOR_EACH_PACKAGE(ndp, np) { > > > > > + NCSI_FOR_EACH_CHANNEL(np, nc) { > > > > > + spin_lock_irqsave(&nc->lock, flags); > > > > > + enabled = nc->monitor.enabled; > > > > > + state = nc->state; > > > > > + spin_unlock_irqrestore(&nc->lock, flags); > > > > > + > > > > > + if (enabled) > > > > > + ncsi_stop_channel_monitor(nc); > > > > > + if (state == NCSI_CHANNEL_ACTIVE) { > > > > > + active = nc; > > > > > + break; > > > > > > > > Is the original intention to process the channel one by one? > > > > If it is the case, there are two loops and we might need to use > > > > "goto found" instead. > > > > > > Yes we'll need to break out of the package loop here as well. > > > > > > > > + } > > > > > + } > > > > > + } > > > > > + > > > > > > > > found: ? > > > > > > > > > + if (!active) { > > > > > + /* Done */ > > > > > + spin_lock_irqsave(&ndp->lock, flags); > > > > > + ndp->flags &= ~NCSI_DEV_RESET; > > > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > > > + return ncsi_choose_active_channel(ndp); > > > > > + } > > > > > + > > > > > + spin_lock_irqsave(&ndp->lock, flags); > > > > > + ndp->flags |= NCSI_DEV_RESET; > > > > > + ndp->active_channel = active; > > > > > + ndp->active_package = active->package; > > > > > + spin_unlock_irqrestore(&ndp->lock, flags); > > > > > + > > > > > + nd->state = ncsi_dev_state_suspend; > > > > > + schedule_work(&ndp->work); > > > > > + return 0; > > > > > +} > > > > > > > > Also similar issue in ncsi_choose_active_channel() function below. > > > > > > > > > @@ -916,32 +1045,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) > > > > > > > > > > ncm = &nc->modes[NCSI_MODE_LINK]; > > > > > if (ncm->data[2] & 0x1) { > > > > > - spin_unlock_irqrestore(&nc->lock, flags); > > > > > found = nc; > > > > > - goto out; > > > > > + with_link = true; > > > > > } > > > > > > > > > > - spin_unlock_irqrestore(&nc->lock, flags); > > > > > + /* If multi_channel is enabled configure all valid > > > > > + * channels whether or not they currently have link > > > > > + * so they will have AENs enabled. > > > > > + */ > > > > > + if (with_link || np->multi_channel) { > > > > > > > > I notice that there is a case that we will misconfigure the interface. > > > > For example below, multi-channel is not enable for package 1. > > > > But we enable the channel for ncsi2 below (package 1 channel 0) as that interface is the first > > > > channel for that package with link. > > > > > > I don't think I see the issue here; multi-channel is not set on package > > > 1, but both channels are in the channel whitelist. Channel 0 is > > > configured since it's the first found on package 1, and channel 1 is not > > > since channel 0 is already found. Are you expecting something different? > > > > > > > The setting is that multi-package is enable for both package 0 and 1. > > Multi-channel is only enabled for package 0. > > > > > > cat /sys/kernel/debug/ncsi_protocol/ncsi_device_ > > > > IFIDX IFNAME NAME PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA > > > > ===================================================================== > > > > 2 eth2 ncsi0 000 000 1 1 1 1 1 1 0 2 1 1 1 1 0 1 > > > > 2 eth2 ncsi1 000 001 1 0 1 1 1 1 0 2 1 1 1 1 0 1 > > > > 2 eth2 ncsi2 001 000 1 0 1 0 1 1 0 2 1 1 1 1 0 1 > > > > I was replying to the wrong old email and it might cause a bit confusion. > > The first 1 meaning channel is enabled for package 1 channel 0 (ncsi2). > > For eth2, we already has ncsi0 as the active channel with TX enable. > > I would think that package doesn't have the multi-channel enabled and > > we should not enable the channel for ncsi2. The problem is that package 1 doesn't > > enable the multi-channel and it believes it needs to enable one channel for its package > > but it doesn't aware that the other package already has one active channel. > > Ah, maybe the confusion here is that multi_channel is a per-package > setting; it determines what a package does with its own channels. > > So you have package 0 with multi-channel enabled so it enables channels 0 > & 1. > Then you have package 1 without multi-channel so it enables only channel > 0. > There is still only one Tx channel (package 0, channel 0). > > Does that sound right, or have I missed something? Yes, you are right. There is only one TX enabled. If we can hold off a few seconds before applying, then we will not see these configuration changes in between the back to back netlink commands. Thanks, Justin
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 592e3cd40728..ed68ca677e41 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -279,6 +279,7 @@ struct ncsi_dev_priv { #define NCSI_DEV_PROBED 1 /* Finalized NCSI topology */ #define NCSI_DEV_HWA 2 /* Enabled HW arbitration */ #define NCSI_DEV_RESHUFFLE 4 +#define NCSI_DEV_RESET 8 /* Reset state of NC */ spinlock_t lock; /* Protect the NCSI device */ #if IS_ENABLED(CONFIG_IPV6) unsigned int inet6_addr_num; /* Number of IPv6 addresses */ @@ -333,6 +334,7 @@ extern spinlock_t ncsi_dev_lock; list_for_each_entry_rcu(nc, &np->channels, node) /* Resources */ +int ncsi_reset_dev(struct ncsi_dev *nd); void ncsi_start_channel_monitor(struct ncsi_channel *nc); void ncsi_stop_channel_monitor(struct ncsi_channel *nc); struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 632caeea672f..af23e2cc6c1d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -550,8 +550,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); - ncsi_process_next_channel(ndp); - + if (ndp->flags & NCSI_DEV_RESET) + ncsi_reset_dev(nd); + else + ncsi_process_next_channel(ndp); break; default: netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n", @@ -1472,7 +1474,7 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - return ncsi_choose_active_channel(nd); + return ncsi_reset_dev(nd); } EXPORT_SYMBOL_GPL(ncsi_start_dev); @@ -1485,7 +1487,10 @@ void ncsi_stop_dev(struct ncsi_dev *nd) int old_state; unsigned long flags; - /* Stop the channel monitor and reset channel's state */ + /* Stop the channel monitor on any active channels. Don't reset the + * channel state so we know which were active when ncsi_start_dev() + * is next called. + */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { ncsi_stop_channel_monitor(nc); @@ -1493,7 +1498,6 @@ void ncsi_stop_dev(struct ncsi_dev *nd) spin_lock_irqsave(&nc->lock, flags); chained = !list_empty(&nc->link); old_state = nc->state; - nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); WARN_ON_ONCE(chained || @@ -1506,6 +1510,51 @@ void ncsi_stop_dev(struct ncsi_dev *nd) } EXPORT_SYMBOL_GPL(ncsi_stop_dev); +int ncsi_reset_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_channel *nc, *active; + struct ncsi_package *np; + unsigned long flags; + bool enabled; + int state; + + active = NULL; + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + enabled = nc->monitor.enabled; + state = nc->state; + spin_unlock_irqrestore(&nc->lock, flags); + + if (enabled) + ncsi_stop_channel_monitor(nc); + if (state == NCSI_CHANNEL_ACTIVE) { + active = nc; + break; + } + } + } + + if (!active) { + /* Done */ + spin_lock_irqsave(&ndp->lock, flags); + ndp->flags &= ~NCSI_DEV_RESET; + spin_unlock_irqrestore(&ndp->lock, flags); + return ncsi_choose_active_channel(ndp); + } + + spin_lock_irqsave(&ndp->lock, flags); + ndp->flags |= NCSI_DEV_RESET; + ndp->active_channel = active; + ndp->active_package = active->package; + spin_unlock_irqrestore(&ndp->lock, flags); + + nd->state = ncsi_dev_state_suspend; + schedule_work(&ndp->work); + return 0; +} + void ncsi_unregister_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 33314381b4f5..06bb8bc2c798 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -330,9 +330,8 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info) package_id, channel_id, channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : ""); - /* Bounce the NCSI channel to set changes */ - ncsi_stop_dev(&ndp->ndev); - ncsi_start_dev(&ndp->ndev); + /* Update channel configuration */ + ncsi_reset_dev(&ndp->ndev); return 0; } @@ -360,9 +359,8 @@ static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info) spin_unlock_irqrestore(&ndp->lock, flags); netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n"); - /* Bounce the NCSI channel to set changes */ - ncsi_stop_dev(&ndp->ndev); - ncsi_start_dev(&ndp->ndev); + /* Update channel configuration */ + ncsi_reset_dev(&ndp->ndev); return 0; }
When the NCSI driver is stopped with ncsi_stop_dev() the channel monitors are stopped and the state set to "inactive". However the channels are still configured and active from the perspective of the network controller. We should suspend each active channel but in the context of ncsi_stop_dev() the transmit queue has been or is about to be stopped so we won't have time to do so. Instead when ncsi_start_dev() is called if the NCSI topology has already been probed then call ncsi_reset_dev() to suspend any channels that were previously active. This resets the network controller to a known state, provides an up to date view of channel link state, and makes sure that mode flags such as NCSI_MODE_TX_ENABLE are properly reset. In addition to ncsi_start_dev() use ncsi_reset_dev() in ncsi-netlink.c to update the channel configuration more cleanly. Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com> --- net/ncsi/internal.h | 2 ++ net/ncsi/ncsi-manage.c | 59 +++++++++++++++++++++++++++++++++++++---- net/ncsi/ncsi-netlink.c | 10 +++---- 3 files changed, 60 insertions(+), 11 deletions(-)