diff mbox series

[net-next,v2,5/6] net/ncsi: Reset channel state in ncsi_start_dev()

Message ID 20181023215201.27315-6-sam@mendozajonas.com
State Deferred, archived
Delegated to: David Miller
Headers show
Series net/ncsi: Allow enabling multiple packages & channels | expand

Commit Message

Sam Mendoza-Jonas Oct. 23, 2018, 9:52 p.m. UTC
When the NCSI driver is stopped with ncsi_stop_dev() the channel
monitors are stopped and the state set to "inactive". However the
channels are still configured and active from the perspective of the
network controller. We should suspend each active channel but in the
context of ncsi_stop_dev() the transmit queue has been or is about to be
stopped so we won't have time to do so.

Instead when ncsi_start_dev() is called if the NCSI topology has already
been probed then call ncsi_reset_dev() to suspend any channels that were
previously active. This resets the network controller to a known state,
provides an up to date view of channel link state, and makes sure that
mode flags such as NCSI_MODE_TX_ENABLE are properly reset.

In addition to ncsi_start_dev() use ncsi_reset_dev() in ncsi-netlink.c
to update the channel configuration more cleanly.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
---
v2: Fixed ncsi_channel_is_tx() to consider the state of channels in other
packages.

 net/ncsi/internal.h     |  2 ++
 net/ncsi/ncsi-manage.c  | 57 +++++++++++++++++++++++++++++++++++++----
 net/ncsi/ncsi-netlink.c | 10 +++-----
 3 files changed, 58 insertions(+), 11 deletions(-)

Comments

Justin.Lee1@Dell.com Oct. 26, 2018, 5:25 p.m. UTC | #1
Hi Samuel,

I noticed a few issues and commented below.

Thanks,
Justin


>  /* Resources */
> +int ncsi_reset_dev(struct ncsi_dev *nd);
>  void ncsi_start_channel_monitor(struct ncsi_channel *nc);
>  void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
>  struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
> diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
> index 014321ad31d3..9bad03e3fa5e 100644
> --- a/net/ncsi/ncsi-manage.c
> +++ b/net/ncsi/ncsi-manage.c
> @@ -550,8 +550,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
>  		spin_lock_irqsave(&nc->lock, flags);
>  		nc->state = NCSI_CHANNEL_INACTIVE;
>  		spin_unlock_irqrestore(&nc->lock, flags);
> -		ncsi_process_next_channel(ndp);
> -
> +		if (ndp->flags & NCSI_DEV_RESET)
> +			ncsi_reset_dev(nd);
> +		else
> +			ncsi_process_next_channel(ndp);
>  		break;
>  	default:
>  		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
> @@ -1554,7 +1556,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
>  		return 0;
>  	}
>  
> -	return ncsi_choose_active_channel(nd);
> +	return ncsi_reset_dev(nd);

If there is no available channel due to the whitelist, ncsi_start_dev() function will return failed
Status and the network interface may fail to bring up too. It is possible for user to disable all 
channels and leave the interface up for checking the LOM status.

>  }
>  EXPORT_SYMBOL_GPL(ncsi_start_dev);

Also, if I send set_package_mask and set_channel_mask commands back to back in a program,
the state machine doesn't work well. If I use command line and wait for it to complete for 
each step, then it is fine.

npcm7xx-emc f0825000.eth eth2: NCSI: Multi-package enabled on ifindex 2, mask 0x00000001
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 0
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0400
npcm7xx-emc f0825000.eth eth2: NCSI: pkg 0 ch 0 set as preferred channel
npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000003
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 1
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 1 state 0400
npcm7xx-emc f0825000.eth eth2: NCSI: Package 1 set to all channels disabled
npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000000
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel()
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 0
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass pkg whitelist
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 0
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 1
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - next pkg
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 1
npcm7xx-emc f0825000.eth eth2: NCSI: No channel found to configure!
npcm7xx-emc f0825000.eth eth2: NCSI interface down
npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
npcm7xx-emc f0825000.eth eth2: Wrong NCSI state 0x100 in workqueue

All masks are set correctly, but you can see the PS column is not right and channel doesn't
configure correctly.

/sys/kernel/debug/ncsi_protocol# cat ncsi_device_status
IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC PS LS RU CR NQ HA
===================================================================
  2   eth2   ncsi0  000 000 1  1  1  1  1  1  1  0  1  1  1  0  1
  2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  0  1  1  1  0  1
  2   eth2   ncsi2  001 000 0  0  1  1  0  0  0  0  1  1  1  0  1
  2   eth2   ncsi3  001 001 0  0  1  1  0  0  0  0  1  1  1  0  1
===================================================================
MP: Multi-mode Package     WP: Whitelist Package
MC: Multi-mode Channel     WC: Whitelist Channel
PC: Primary Channel
PS: Poll Status
LS: Link Status
RU: Running
CR: Carrier OK
NQ: Queue Stopped
HA: Hardware Arbitration

PS column is getting from (int)nc->monitor.enabled.
Sam Mendoza-Jonas Oct. 30, 2018, 12:23 a.m. UTC | #2
On Fri, 2018-10-26 at 17:25 +0000, Justin.Lee1@Dell.com wrote:
> Hi Samuel,
> 
> I noticed a few issues and commented below.
> 
> Thanks,
> Justin
> 
> 
> >  /* Resources */
> > +int ncsi_reset_dev(struct ncsi_dev *nd);
> >  void ncsi_start_channel_monitor(struct ncsi_channel *nc);
> >  void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
> >  struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
> > diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
> > index 014321ad31d3..9bad03e3fa5e 100644
> > --- a/net/ncsi/ncsi-manage.c
> > +++ b/net/ncsi/ncsi-manage.c
> > @@ -550,8 +550,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
> >  		spin_lock_irqsave(&nc->lock, flags);
> >  		nc->state = NCSI_CHANNEL_INACTIVE;
> >  		spin_unlock_irqrestore(&nc->lock, flags);
> > -		ncsi_process_next_channel(ndp);
> > -
> > +		if (ndp->flags & NCSI_DEV_RESET)
> > +			ncsi_reset_dev(nd);
> > +		else
> > +			ncsi_process_next_channel(ndp);
> >  		break;
> >  	default:
> >  		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
> > @@ -1554,7 +1556,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
> >  		return 0;
> >  	}
> >  
> > -	return ncsi_choose_active_channel(nd);
> > +	return ncsi_reset_dev(nd);
> 
> If there is no available channel due to the whitelist, ncsi_start_dev() function will return failed
> Status and the network interface may fail to bring up too. It is possible for user to disable all 
> channels and leave the interface up for checking the LOM status.
> 

I'm not sure that that is a bug, or at least not in the scope of this
series. If the whitelist is set such that no channels are valid then
there's nothing for NCSI to do. If we want to do something like always
monitor all channels then that would be best to do in another patch.

> >  }
> >  EXPORT_SYMBOL_GPL(ncsi_start_dev);
> 
> Also, if I send set_package_mask and set_channel_mask commands back to back in a program,
> the state machine doesn't work well. If I use command line and wait for it to complete for 
> each step, then it is fine.

Yeah that's not great; probably hitting some corner cases in the NCSI
locking. I'll look into the multi-channel related stuff but I have a
feeling that if you tried this with the existing set/clear commands you
would probably hit something similar, especially on your dual core
platform. If so this is probably something to fix separately.

> 
> npcm7xx-emc f0825000.eth eth2: NCSI: Multi-package enabled on ifindex 2, mask 0x00000001
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 0
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0400
> npcm7xx-emc f0825000.eth eth2: NCSI: pkg 0 ch 0 set as preferred channel
> npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000003
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 1
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 1 state 0400
> npcm7xx-emc f0825000.eth eth2: NCSI: Package 1 set to all channels disabled
> npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000000
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 0
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass pkg whitelist
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 0
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 1
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - next pkg
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 1
> npcm7xx-emc f0825000.eth eth2: NCSI: No channel found to configure!
> npcm7xx-emc f0825000.eth eth2: NCSI interface down
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> npcm7xx-emc f0825000.eth eth2: Wrong NCSI state 0x100 in workqueue
> 
> All masks are set correctly, but you can see the PS column is not right and channel doesn't
> configure correctly.
> 
> /sys/kernel/debug/ncsi_protocol# cat ncsi_device_status
> IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC PS LS RU CR NQ HA
> ===================================================================
>   2   eth2   ncsi0  000 000 1  1  1  1  1  1  1  0  1  1  1  0  1
>   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  0  1  1  1  0  1
>   2   eth2   ncsi2  001 000 0  0  1  1  0  0  0  0  1  1  1  0  1
>   2   eth2   ncsi3  001 001 0  0  1  1  0  0  0  0  1  1  1  0  1
> ===================================================================
> MP: Multi-mode Package     WP: Whitelist Package
> MC: Multi-mode Channel     WC: Whitelist Channel
> PC: Primary Channel
> PS: Poll Status
> LS: Link Status
> RU: Running
> CR: Carrier OK
> NQ: Queue Stopped
> HA: Hardware Arbitration
> 
> PS column is getting from (int)nc->monitor.enabled.
Justin.Lee1@Dell.com Oct. 30, 2018, 6:23 p.m. UTC | #3
> On Fri, 2018-10-26 at 17:25 +0000, Justin.Lee1@Dell.com wrote:
> > Hi Samuel,
> > 
> > I noticed a few issues and commented below.
> > 
> > Thanks,
> > Justin
> > 
> > 
> > >  /* Resources */
> > > +int ncsi_reset_dev(struct ncsi_dev *nd);
> > >  void ncsi_start_channel_monitor(struct ncsi_channel *nc);
> > >  void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
> > >  struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
> > > diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
> > > index 014321ad31d3..9bad03e3fa5e 100644
> > > --- a/net/ncsi/ncsi-manage.c
> > > +++ b/net/ncsi/ncsi-manage.c
> > > @@ -550,8 +550,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
> > >  		spin_lock_irqsave(&nc->lock, flags);
> > >  		nc->state = NCSI_CHANNEL_INACTIVE;
> > >  		spin_unlock_irqrestore(&nc->lock, flags);
> > > -		ncsi_process_next_channel(ndp);
> > > -
> > > +		if (ndp->flags & NCSI_DEV_RESET)
> > > +			ncsi_reset_dev(nd);
> > > +		else
> > > +			ncsi_process_next_channel(ndp);
> > >  		break;
> > >  	default:
> > >  		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
> > > @@ -1554,7 +1556,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
> > >  		return 0;
> > >  	}
> > >  
> > > -	return ncsi_choose_active_channel(nd);
> > > +	return ncsi_reset_dev(nd);
> > 
> > If there is no available channel due to the whitelist, ncsi_start_dev() function will return failed
> > Status and the network interface may fail to bring up too. It is possible for user to disable all 
> > channels and leave the interface up for checking the LOM status.
> > 
> 
> I'm not sure that that is a bug, or at least not in the scope of this
> series. If the whitelist is set such that no channels are valid then
> there's nothing for NCSI to do. If we want to do something like always
> monitor all channels then that would be best to do in another patch.
> 
> > >  }
> > >  EXPORT_SYMBOL_GPL(ncsi_start_dev);
> > 
> > Also, if I send set_package_mask and set_channel_mask commands back to back in a program,
> > the state machine doesn't work well. If I use command line and wait for it to complete for 
> > each step, then it is fine.
> 
> Yeah that's not great; probably hitting some corner cases in the NCSI
> locking. I'll look into the multi-channel related stuff but I have a
> feeling that if you tried this with the existing set/clear commands you
> would probably hit something similar, especially on your dual core
> platform. If so this is probably something to fix separately.
> 

It is possible that it is causing by the following code in ncsi_reset_dev() function.
The state might be overwritten and the previous operation is interrupted.

	spin_lock_irqsave(&ndp->lock, flags);
	ndp->flags |= NCSI_DEV_RESET;
	ndp->active_channel = active;
	ndp->active_package = active->package;
	spin_unlock_irqrestore(&ndp->lock, flags);

	nd->state = ncsi_dev_state_suspend;

> > 
> > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-package enabled on ifindex 2, mask 0x00000001
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 0
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0400
> > npcm7xx-emc f0825000.eth eth2: NCSI: pkg 0 ch 0 set as preferred channel
> > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000003
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 1
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 1 state 0400
> > npcm7xx-emc f0825000.eth eth2: NCSI: Package 1 set to all channels disabled
> > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000000
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel()
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 0
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass pkg whitelist
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 0
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 1
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - next pkg
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 1
> > npcm7xx-emc f0825000.eth eth2: NCSI: No channel found to configure!
> > npcm7xx-emc f0825000.eth eth2: NCSI interface down
> > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > npcm7xx-emc f0825000.eth eth2: Wrong NCSI state 0x100 in workqueue
> > 
> > All masks are set correctly, but you can see the PS column is not right and channel doesn't
> > configure correctly.
> > 
> > /sys/kernel/debug/ncsi_protocol# cat ncsi_device_status
> > IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC PS LS RU CR NQ HA
> > ===================================================================
> >   2   eth2   ncsi0  000 000 1  1  1  1  1  1  1  0  1  1  1  0  1
> >   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  0  1  1  1  0  1
> >   2   eth2   ncsi2  001 000 0  0  1  1  0  0  0  0  1  1  1  0  1
> >   2   eth2   ncsi3  001 001 0  0  1  1  0  0  0  0  1  1  1  0  1
> > ===================================================================
> > MP: Multi-mode Package     WP: Whitelist Package
> > MC: Multi-mode Channel     WC: Whitelist Channel
> > PC: Primary Channel
> > PS: Poll Status
> > LS: Link Status
> > RU: Running
> > CR: Carrier OK
> > NQ: Queue Stopped
> > HA: Hardware Arbitration
> > 
> > PS column is getting from (int)nc->monitor.enabled.
Sam Mendoza-Jonas Nov. 1, 2018, 4:30 a.m. UTC | #4
On Tue, 2018-10-30 at 18:23 +0000, Justin.Lee1@Dell.com wrote:
> > On Fri, 2018-10-26 at 17:25 +0000, Justin.Lee1@Dell.com wrote:
> > > Hi Samuel,
> > > 
> > > I noticed a few issues and commented below.
> > > 
> > > Thanks,
> > > Justin
> > > 
> > > 
> > > >  /* Resources */
> > > > +int ncsi_reset_dev(struct ncsi_dev *nd);
> > > >  void ncsi_start_channel_monitor(struct ncsi_channel *nc);
> > > >  void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
> > > >  struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
> > > > diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
> > > > index 014321ad31d3..9bad03e3fa5e 100644
> > > > --- a/net/ncsi/ncsi-manage.c
> > > > +++ b/net/ncsi/ncsi-manage.c
> > > > @@ -550,8 +550,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
> > > >  		spin_lock_irqsave(&nc->lock, flags);
> > > >  		nc->state = NCSI_CHANNEL_INACTIVE;
> > > >  		spin_unlock_irqrestore(&nc->lock, flags);
> > > > -		ncsi_process_next_channel(ndp);
> > > > -
> > > > +		if (ndp->flags & NCSI_DEV_RESET)
> > > > +			ncsi_reset_dev(nd);
> > > > +		else
> > > > +			ncsi_process_next_channel(ndp);
> > > >  		break;
> > > >  	default:
> > > >  		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
> > > > @@ -1554,7 +1556,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
> > > >  		return 0;
> > > >  	}
> > > >  
> > > > -	return ncsi_choose_active_channel(nd);
> > > > +	return ncsi_reset_dev(nd);
> > > 
> > > If there is no available channel due to the whitelist, ncsi_start_dev() function will return failed
> > > Status and the network interface may fail to bring up too. It is possible for user to disable all 
> > > channels and leave the interface up for checking the LOM status.
> > > 
> > 
> > I'm not sure that that is a bug, or at least not in the scope of this
> > series. If the whitelist is set such that no channels are valid then
> > there's nothing for NCSI to do. If we want to do something like always
> > monitor all channels then that would be best to do in another patch.
> > 
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(ncsi_start_dev);
> > > 
> > > Also, if I send set_package_mask and set_channel_mask commands back to back in a program,
> > > the state machine doesn't work well. If I use command line and wait for it to complete for 
> > > each step, then it is fine.
> > 
> > Yeah that's not great; probably hitting some corner cases in the NCSI
> > locking. I'll look into the multi-channel related stuff but I have a
> > feeling that if you tried this with the existing set/clear commands you
> > would probably hit something similar, especially on your dual core
> > platform. If so this is probably something to fix separately.
> > 
> 
> It is possible that it is causing by the following code in ncsi_reset_dev() function.
> The state might be overwritten and the previous operation is interrupted.
> 
> 	spin_lock_irqsave(&ndp->lock, flags);
> 	ndp->flags |= NCSI_DEV_RESET;
> 	ndp->active_channel = active;
> 	ndp->active_package = active->package;
> 	spin_unlock_irqrestore(&ndp->lock, flags);
> 
> 	nd->state = ncsi_dev_state_suspend;

Yep, we should probably add a check before calling ncsi_reset_dev() in
the netlink code if we're already in reset, and check in ncsi_reset_dev()
if we mid-configuration.

For your trace below can you share exactly which commands you were
sending? Those messages aren't upstream so it's not 100% clear what's
being sent.

Thanks!
Sam

> 
> > > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-package enabled on ifindex 2, mask 0x00000001
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 0
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0400
> > > npcm7xx-emc f0825000.eth eth2: NCSI: pkg 0 ch 0 set as preferred channel
> > > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000003
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 1
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 1 state 0400
> > > npcm7xx-emc f0825000.eth eth2: NCSI: Package 1 set to all channels disabled
> > > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000000
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel()
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 0
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass pkg whitelist
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 0
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 1
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - next pkg
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 1
> > > npcm7xx-emc f0825000.eth eth2: NCSI: No channel found to configure!
> > > npcm7xx-emc f0825000.eth eth2: NCSI interface down
> > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > > npcm7xx-emc f0825000.eth eth2: Wrong NCSI state 0x100 in workqueue
> > > 
> > > All masks are set correctly, but you can see the PS column is not right and channel doesn't
> > > configure correctly.
> > > 
> > > /sys/kernel/debug/ncsi_protocol# cat ncsi_device_status
> > > IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC PS LS RU CR NQ HA
> > > ===================================================================
> > >   2   eth2   ncsi0  000 000 1  1  1  1  1  1  1  0  1  1  1  0  1
> > >   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  0  1  1  1  0  1
> > >   2   eth2   ncsi2  001 000 0  0  1  1  0  0  0  0  1  1  1  0  1
> > >   2   eth2   ncsi3  001 001 0  0  1  1  0  0  0  0  1  1  1  0  1
> > > ===================================================================
> > > MP: Multi-mode Package     WP: Whitelist Package
> > > MC: Multi-mode Channel     WC: Whitelist Channel
> > > PC: Primary Channel
> > > PS: Poll Status
> > > LS: Link Status
> > > RU: Running
> > > CR: Carrier OK
> > > NQ: Queue Stopped
> > > HA: Hardware Arbitration
> > > 
> > > PS column is getting from (int)nc->monitor.enabled.
> 
>
Justin.Lee1@Dell.com Nov. 1, 2018, 3:51 p.m. UTC | #5
> For your trace below can you share exactly which commands you were
> sending? Those messages aren't upstream so it's not 100% clear what's
> being sent.
> 
> Thanks!
> Sam
> 

It is sending via netlink directly. I have two packages (0 and 1) and each package has
two channels (0 and 1) on my system. If I convert it to command line, it would be
as below.

/home/root# ncsi_netlink -l 2 -m -a 0x01
Set cmd - ifindex 2, multi 1, mask 0x00000001

/home/root# ncsi_netlink -l 2 -m -b 0x03 -p 0
Set cmd - ifindex 2, package 0, channel -1, multi 1, mask 0x00000003

/home/root# ncsi_netlink -l 2 -m -b 0x00 -p 1
Set cmd - ifindex 2, package 1, channel -1, multi 1, mask 0x00000000

/home/root# ncsi_netlink
Usage: ncsi_netlink COMMAND [OPTION]...
Send messages to the NCSI driver via Netlink (0.4)

Mandatory arguments to long options are mandatory for short options too.
COMMAND:
  -i, --info                 Display info for packages and channels
  -s, --set                  Force the usage of a certain package/channel combination
  -x, --clear                Clear the above setting
  -a, --package-mask MASK    Set package selection mask
  -b, --channel-mask MASK    Set channel selection mask
  -o, --cmd DATA0 [DATA1]... Send NC-SI command
  -d, --discovery            Request to discover packages and channels
  -k, --lookup               Look up channel id or name
  -u, --status               Display status
  -h, --help                 Print this help text

OPTION:
  -l, --ifindex INDEX        Specify the interface index
  -p, --package PACKAGE      Package number
  -c, --channel CHANNEL      Channel number (aka. port number)
  -n, --channel NAME         Channel name (aka. ncsiX)
  -m, --multi-mode           Set multi-mode

Example: ncsi_netlink -l 2 --info


> > 
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-package enabled on ifindex 2, mask 0x00000001
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 0
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0400
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: pkg 0 ch 0 set as preferred channel
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000003
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 1
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 1 state 0400
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: Package 1 set to all channels disabled
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: Multi-channel enabled on ifindex 2, mask 0x00000000
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel()
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 0
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass pkg whitelist
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 0
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - ch 1
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pass ch whitelist
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - skip
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - next pkg
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_choose_active_channel() - pkg 1
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: No channel found to configure!
> > > > npcm7xx-emc f0825000.eth eth2: NCSI interface down
> > > > npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> > > > npcm7xx-emc f0825000.eth eth2: Wrong NCSI state 0x100 in workqueue
> > > > 
> > > > All masks are set correctly, but you can see the PS column is not right and channel doesn't
> > > > configure correctly.
> > > > 
> > > > /sys/kernel/debug/ncsi_protocol# cat ncsi_device_status
> > > > IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC PS LS RU CR NQ HA
> > > > ===================================================================
> > > >   2   eth2   ncsi0  000 000 1  1  1  1  1  1  1  0  1  1  1  0  1
> > > >   2   eth2   ncsi1  000 001 1  0  1  1  1  1  0  0  1  1  1  0  1
> > > >   2   eth2   ncsi2  001 000 0  0  1  1  0  0  0  0  1  1  1  0  1
> > > >   2   eth2   ncsi3  001 001 0  0  1  1  0  0  0  0  1  1  1  0  1
> > > > ===================================================================
> > > > MP: Multi-mode Package     WP: Whitelist Package
> > > > MC: Multi-mode Channel     WC: Whitelist Channel
> > > > PC: Primary Channel
> > > > PS: Poll Status
> > > > LS: Link Status
> > > > RU: Running
> > > > CR: Carrier OK
> > > > NQ: Queue Stopped
> > > > HA: Hardware Arbitration
> > > > 
> > > > PS column is getting from (int)nc->monitor.enabled.
> 
>
diff mbox series

Patch

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index ec65778c41f3..bda51cb179fe 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -287,6 +287,7 @@  struct ncsi_dev_priv {
 #define NCSI_DEV_PROBED		1            /* Finalized NCSI topology    */
 #define NCSI_DEV_HWA		2            /* Enabled HW arbitration     */
 #define NCSI_DEV_RESHUFFLE	4
+#define NCSI_DEV_RESET		8            /* Reset state of NC          */
 	unsigned int        gma_flag;        /* OEM GMA flag               */
 	spinlock_t          lock;            /* Protect the NCSI device    */
 #if IS_ENABLED(CONFIG_IPV6)
@@ -342,6 +343,7 @@  extern spinlock_t ncsi_dev_lock;
 	list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
+int ncsi_reset_dev(struct ncsi_dev *nd);
 void ncsi_start_channel_monitor(struct ncsi_channel *nc);
 void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
 struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 014321ad31d3..9bad03e3fa5e 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -550,8 +550,10 @@  static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
 		spin_lock_irqsave(&nc->lock, flags);
 		nc->state = NCSI_CHANNEL_INACTIVE;
 		spin_unlock_irqrestore(&nc->lock, flags);
-		ncsi_process_next_channel(ndp);
-
+		if (ndp->flags & NCSI_DEV_RESET)
+			ncsi_reset_dev(nd);
+		else
+			ncsi_process_next_channel(ndp);
 		break;
 	default:
 		netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
@@ -1554,7 +1556,7 @@  int ncsi_start_dev(struct ncsi_dev *nd)
 		return 0;
 	}
 
-	return ncsi_choose_active_channel(nd);
+	return ncsi_reset_dev(nd);
 }
 EXPORT_SYMBOL_GPL(ncsi_start_dev);
 
@@ -1567,7 +1569,10 @@  void ncsi_stop_dev(struct ncsi_dev *nd)
 	int old_state;
 	unsigned long flags;
 
-	/* Stop the channel monitor and reset channel's state */
+	/* Stop the channel monitor on any active channels. Don't reset the
+	 * channel state so we know which were active when ncsi_start_dev()
+	 * is next called.
+	 */
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
 			ncsi_stop_channel_monitor(nc);
@@ -1575,7 +1580,6 @@  void ncsi_stop_dev(struct ncsi_dev *nd)
 			spin_lock_irqsave(&nc->lock, flags);
 			chained = !list_empty(&nc->link);
 			old_state = nc->state;
-			nc->state = NCSI_CHANNEL_INACTIVE;
 			spin_unlock_irqrestore(&nc->lock, flags);
 
 			WARN_ON_ONCE(chained ||
@@ -1588,6 +1592,49 @@  void ncsi_stop_dev(struct ncsi_dev *nd)
 }
 EXPORT_SYMBOL_GPL(ncsi_stop_dev);
 
+int ncsi_reset_dev(struct ncsi_dev *nd)
+{
+	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+	struct ncsi_channel *nc, *active;
+	struct ncsi_package *np;
+	unsigned long flags;
+
+	active = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+
+			if (nc->state == NCSI_CHANNEL_ACTIVE) {
+				active = nc;
+				nc->state = NCSI_CHANNEL_INVISIBLE;
+				spin_unlock_irqrestore(&nc->lock, flags);
+				ncsi_stop_channel_monitor(nc);
+				break;
+			}
+
+			spin_unlock_irqrestore(&nc->lock, flags);
+		}
+	}
+
+	if (!active) {
+		/* Done */
+		spin_lock_irqsave(&ndp->lock, flags);
+		ndp->flags &= ~NCSI_DEV_RESET;
+		spin_unlock_irqrestore(&ndp->lock, flags);
+		return ncsi_choose_active_channel(ndp);
+	}
+
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->flags |= NCSI_DEV_RESET;
+	ndp->active_channel = active;
+	ndp->active_package = active->package;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	nd->state = ncsi_dev_state_suspend;
+	schedule_work(&ndp->work);
+	return 0;
+}
+
 void ncsi_unregister_dev(struct ncsi_dev *nd)
 {
 	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 33314381b4f5..06bb8bc2c798 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -330,9 +330,8 @@  static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
 		    package_id, channel_id,
 		    channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
 
-	/* Bounce the NCSI channel to set changes */
-	ncsi_stop_dev(&ndp->ndev);
-	ncsi_start_dev(&ndp->ndev);
+	/* Update channel configuration */
+	ncsi_reset_dev(&ndp->ndev);
 
 	return 0;
 }
@@ -360,9 +359,8 @@  static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
 	spin_unlock_irqrestore(&ndp->lock, flags);
 	netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
 
-	/* Bounce the NCSI channel to set changes */
-	ncsi_stop_dev(&ndp->ndev);
-	ncsi_start_dev(&ndp->ndev);
+	/* Update channel configuration */
+	ncsi_reset_dev(&ndp->ndev);
 
 	return 0;
 }