Message ID | 56328a74e135e8c904af4aa3f73e0fe2843b7500.1490903650.git.joseph.salisbury@canonical.com |
---|---|
State | New |
Headers | show |
On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: > From: Jack Morgenstein <jackm@dev.mellanox.co.il> > > BugLink: http://bugs.launchpad.net/bugs/1672785 > > Some Hypervisors detach VFs from VMs by instantly causing an FLR event > to be generated for a VF. > > In the mlx4 case, this will cause that VF's comm channel to be disabled > before the VM has an opportunity to invoke the VF device's "shutdown" > method. > > For such Hypervisors, there is a race condition between the VF's > shutdown method and its internal-error detection/reset thread. > > The internal-error detection/reset thread (which runs every 5 seconds) also > detects a disabled comm channel. If the internal-error detection/reset > flow wins the race, we still get delays (while that flow tries repeatedly > to detect comm-channel recovery). > > The cited commit fixed the command timeout problem when the > internal-error detection/reset flow loses the race. > > This commit avoids the unneeded delays when the internal-error > detection/reset flow wins. > > Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") > Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> > Reported-by: Simon Xiao <sixiao@microsoft.com> > Signed-off-by: Tariq Toukan <tariqt@mellanox.com> > Signed-off-by: David S. Miller <davem@davemloft.net> > (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) > Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> > --- > drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ > drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ > include/linux/mlx4/device.h | 1 + > 3 files changed, 23 insertions(+) > > diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c > index f04a423..be6906b 100644 > --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c > +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c > @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) > rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); > if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { > /* PCI might be offline */ > + > + /* If device removal has been requested, > + * do not continue retrying. > + */ > + if (dev->persist->interface_state & > + MLX4_INTERFACE_STATE_NOWAIT) { > + mlx4_warn(dev, > + "communication channel is offline\n"); > + return -EIO; > + } > + > msleep(100); > wr_toggle = swab32(readl(&priv->mfunc.comm-> > slave_write)); > diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c > index 7183ac4..ea5e362 100644 > --- a/drivers/net/ethernet/mellanox/mlx4/main.c > +++ b/drivers/net/ethernet/mellanox/mlx4/main.c > @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) > (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); > if (!offline_bit) > return 0; > + > + /* If device removal has been requested, > + * do not continue retrying. > + */ > + if (dev->persist->interface_state & > + MLX4_INTERFACE_STATE_NOWAIT) > + break; > + > /* There are cases as part of AER/Reset flow that PF needs > * around 100 msec to load. We therefore sleep for 100 msec > * to allow other tasks to make use of that CPU during this > @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) > struct devlink *devlink = priv_to_devlink(priv); > int active_vfs = 0; > > + if (mlx4_is_slave(dev)) > + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; > + > mutex_lock(&persist->interface_state_mutex); > persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; > mutex_unlock(&persist->interface_state_mutex); > diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h > index 42da355..d49f11d 100644 > --- a/include/linux/mlx4/device.h > +++ b/include/linux/mlx4/device.h > @@ -468,6 +468,7 @@ enum { > MLX4_INTERFACE_STATE_UP = 1 << 0, > MLX4_INTERFACE_STATE_DELETION = 1 << 1, > MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, > + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, So, NOWAIT and SHUTDOWN are defined to the same value, so it might be necessary to review it that is going to work as expected. Maybe doing the shutdown removal is a path to consider (commit b4353708f5a1c084fd73f1b6fd243b142157b173). Cascardo. > }; > > #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ > -- > 2.7.4 > > > -- > kernel-team mailing list > kernel-team@lists.ubuntu.com > https://lists.ubuntu.com/mailman/listinfo/kernel-team
We're engaged to test the driver in the intended scenario and code path. We have a stable repro for the bug that this fixes. Thanks, --jrp On Mon, Apr 3, 2017 at 2:26 AM, Thadeu Lima de Souza Cascardo <cascardo@canonical.com> wrote: > On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: >> From: Jack Morgenstein <jackm@dev.mellanox.co.il> >> >> BugLink: http://bugs.launchpad.net/bugs/1672785 >> >> Some Hypervisors detach VFs from VMs by instantly causing an FLR event >> to be generated for a VF. >> >> In the mlx4 case, this will cause that VF's comm channel to be disabled >> before the VM has an opportunity to invoke the VF device's "shutdown" >> method. >> >> For such Hypervisors, there is a race condition between the VF's >> shutdown method and its internal-error detection/reset thread. >> >> The internal-error detection/reset thread (which runs every 5 seconds) also >> detects a disabled comm channel. If the internal-error detection/reset >> flow wins the race, we still get delays (while that flow tries repeatedly >> to detect comm-channel recovery). >> >> The cited commit fixed the command timeout problem when the >> internal-error detection/reset flow loses the race. >> >> This commit avoids the unneeded delays when the internal-error >> detection/reset flow wins. >> >> Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") >> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> >> Reported-by: Simon Xiao <sixiao@microsoft.com> >> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> >> Signed-off-by: David S. Miller <davem@davemloft.net> >> (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) >> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> >> --- >> drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ >> drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ >> include/linux/mlx4/device.h | 1 + >> 3 files changed, 23 insertions(+) >> >> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c >> index f04a423..be6906b 100644 >> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c >> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c >> @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) >> rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); >> if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { >> /* PCI might be offline */ >> + >> + /* If device removal has been requested, >> + * do not continue retrying. >> + */ >> + if (dev->persist->interface_state & >> + MLX4_INTERFACE_STATE_NOWAIT) { >> + mlx4_warn(dev, >> + "communication channel is offline\n"); >> + return -EIO; >> + } >> + >> msleep(100); >> wr_toggle = swab32(readl(&priv->mfunc.comm-> >> slave_write)); >> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c >> index 7183ac4..ea5e362 100644 >> --- a/drivers/net/ethernet/mellanox/mlx4/main.c >> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c >> @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) >> (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); >> if (!offline_bit) >> return 0; >> + >> + /* If device removal has been requested, >> + * do not continue retrying. >> + */ >> + if (dev->persist->interface_state & >> + MLX4_INTERFACE_STATE_NOWAIT) >> + break; >> + >> /* There are cases as part of AER/Reset flow that PF needs >> * around 100 msec to load. We therefore sleep for 100 msec >> * to allow other tasks to make use of that CPU during this >> @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) >> struct devlink *devlink = priv_to_devlink(priv); >> int active_vfs = 0; >> >> + if (mlx4_is_slave(dev)) >> + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; >> + >> mutex_lock(&persist->interface_state_mutex); >> persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; >> mutex_unlock(&persist->interface_state_mutex); >> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h >> index 42da355..d49f11d 100644 >> --- a/include/linux/mlx4/device.h >> +++ b/include/linux/mlx4/device.h >> @@ -468,6 +468,7 @@ enum { >> MLX4_INTERFACE_STATE_UP = 1 << 0, >> MLX4_INTERFACE_STATE_DELETION = 1 << 1, >> MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, >> + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, > > So, NOWAIT and SHUTDOWN are defined to the same value, so it might be > necessary to review it that is going to work as expected. Maybe doing > the shutdown removal is a path to consider (commit > b4353708f5a1c084fd73f1b6fd243b142157b173). > > Cascardo. > >> }; >> >> #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ >> -- >> 2.7.4 >> >> >> -- >> kernel-team mailing list >> kernel-team@lists.ubuntu.com >> https://lists.ubuntu.com/mailman/listinfo/kernel-team > > -- > kernel-team mailing list > kernel-team@lists.ubuntu.com > https://lists.ubuntu.com/mailman/listinfo/kernel-team
On Mon, Apr 03, 2017 at 05:11:34AM -0700, Joshua R. Poulson wrote: > We're engaged to test the driver in the intended scenario and code > path. We have a stable repro for the bug that this fixes. > > Thanks, --jrp > Is that on Yakkety, with this Yakkety backport as is? I am more concerned about the paths that would use SHUTDOWN, have you tested these as well? Thanks. Cascardo. > On Mon, Apr 3, 2017 at 2:26 AM, Thadeu Lima de Souza Cascardo > <cascardo@canonical.com> wrote: > > On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: > >> From: Jack Morgenstein <jackm@dev.mellanox.co.il> > >> > >> BugLink: http://bugs.launchpad.net/bugs/1672785 > >> > >> Some Hypervisors detach VFs from VMs by instantly causing an FLR event > >> to be generated for a VF. > >> > >> In the mlx4 case, this will cause that VF's comm channel to be disabled > >> before the VM has an opportunity to invoke the VF device's "shutdown" > >> method. > >> > >> For such Hypervisors, there is a race condition between the VF's > >> shutdown method and its internal-error detection/reset thread. > >> > >> The internal-error detection/reset thread (which runs every 5 seconds) also > >> detects a disabled comm channel. If the internal-error detection/reset > >> flow wins the race, we still get delays (while that flow tries repeatedly > >> to detect comm-channel recovery). > >> > >> The cited commit fixed the command timeout problem when the > >> internal-error detection/reset flow loses the race. > >> > >> This commit avoids the unneeded delays when the internal-error > >> detection/reset flow wins. > >> > >> Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") > >> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> > >> Reported-by: Simon Xiao <sixiao@microsoft.com> > >> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> > >> Signed-off-by: David S. Miller <davem@davemloft.net> > >> (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) > >> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> > >> --- > >> drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ > >> drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ > >> include/linux/mlx4/device.h | 1 + > >> 3 files changed, 23 insertions(+) > >> > >> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c > >> index f04a423..be6906b 100644 > >> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c > >> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c > >> @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) > >> rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); > >> if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { > >> /* PCI might be offline */ > >> + > >> + /* If device removal has been requested, > >> + * do not continue retrying. > >> + */ > >> + if (dev->persist->interface_state & > >> + MLX4_INTERFACE_STATE_NOWAIT) { > >> + mlx4_warn(dev, > >> + "communication channel is offline\n"); > >> + return -EIO; > >> + } > >> + > >> msleep(100); > >> wr_toggle = swab32(readl(&priv->mfunc.comm-> > >> slave_write)); > >> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c > >> index 7183ac4..ea5e362 100644 > >> --- a/drivers/net/ethernet/mellanox/mlx4/main.c > >> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c > >> @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) > >> (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); > >> if (!offline_bit) > >> return 0; > >> + > >> + /* If device removal has been requested, > >> + * do not continue retrying. > >> + */ > >> + if (dev->persist->interface_state & > >> + MLX4_INTERFACE_STATE_NOWAIT) > >> + break; > >> + > >> /* There are cases as part of AER/Reset flow that PF needs > >> * around 100 msec to load. We therefore sleep for 100 msec > >> * to allow other tasks to make use of that CPU during this > >> @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) > >> struct devlink *devlink = priv_to_devlink(priv); > >> int active_vfs = 0; > >> > >> + if (mlx4_is_slave(dev)) > >> + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; > >> + > >> mutex_lock(&persist->interface_state_mutex); > >> persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; > >> mutex_unlock(&persist->interface_state_mutex); > >> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h > >> index 42da355..d49f11d 100644 > >> --- a/include/linux/mlx4/device.h > >> +++ b/include/linux/mlx4/device.h > >> @@ -468,6 +468,7 @@ enum { > >> MLX4_INTERFACE_STATE_UP = 1 << 0, > >> MLX4_INTERFACE_STATE_DELETION = 1 << 1, > >> MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, > >> + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, > > > > So, NOWAIT and SHUTDOWN are defined to the same value, so it might be > > necessary to review it that is going to work as expected. Maybe doing > > the shutdown removal is a path to consider (commit > > b4353708f5a1c084fd73f1b6fd243b142157b173). > > > > Cascardo. > > > >> }; > >> > >> #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ > >> -- > >> 2.7.4 > >> > >> > >> -- > >> kernel-team mailing list > >> kernel-team@lists.ubuntu.com > >> https://lists.ubuntu.com/mailman/listinfo/kernel-team > > > > -- > > kernel-team mailing list > > kernel-team@lists.ubuntu.com > > https://lists.ubuntu.com/mailman/listinfo/kernel-team
Our friends at Mellanox have, we have been testing VF remove and add as part of SR-IOV in Azure testing. Our regression tests include on-premises Hyper-V as well. Thanks --jrp On Mon, Apr 3, 2017 at 6:32 AM, Thadeu Lima de Souza Cascardo <cascardo@canonical.com> wrote: > On Mon, Apr 03, 2017 at 05:11:34AM -0700, Joshua R. Poulson wrote: >> We're engaged to test the driver in the intended scenario and code >> path. We have a stable repro for the bug that this fixes. >> >> Thanks, --jrp >> > > Is that on Yakkety, with this Yakkety backport as is? > > I am more concerned about the paths that would use SHUTDOWN, have you tested > these as well? > > Thanks. > Cascardo. > >> On Mon, Apr 3, 2017 at 2:26 AM, Thadeu Lima de Souza Cascardo >> <cascardo@canonical.com> wrote: >> > On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: >> >> From: Jack Morgenstein <jackm@dev.mellanox.co.il> >> >> >> >> BugLink: http://bugs.launchpad.net/bugs/1672785 >> >> >> >> Some Hypervisors detach VFs from VMs by instantly causing an FLR event >> >> to be generated for a VF. >> >> >> >> In the mlx4 case, this will cause that VF's comm channel to be disabled >> >> before the VM has an opportunity to invoke the VF device's "shutdown" >> >> method. >> >> >> >> For such Hypervisors, there is a race condition between the VF's >> >> shutdown method and its internal-error detection/reset thread. >> >> >> >> The internal-error detection/reset thread (which runs every 5 seconds) also >> >> detects a disabled comm channel. If the internal-error detection/reset >> >> flow wins the race, we still get delays (while that flow tries repeatedly >> >> to detect comm-channel recovery). >> >> >> >> The cited commit fixed the command timeout problem when the >> >> internal-error detection/reset flow loses the race. >> >> >> >> This commit avoids the unneeded delays when the internal-error >> >> detection/reset flow wins. >> >> >> >> Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") >> >> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> >> >> Reported-by: Simon Xiao <sixiao@microsoft.com> >> >> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> >> >> Signed-off-by: David S. Miller <davem@davemloft.net> >> >> (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) >> >> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> >> >> --- >> >> drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ >> >> drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ >> >> include/linux/mlx4/device.h | 1 + >> >> 3 files changed, 23 insertions(+) >> >> >> >> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c >> >> index f04a423..be6906b 100644 >> >> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c >> >> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c >> >> @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) >> >> rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); >> >> if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { >> >> /* PCI might be offline */ >> >> + >> >> + /* If device removal has been requested, >> >> + * do not continue retrying. >> >> + */ >> >> + if (dev->persist->interface_state & >> >> + MLX4_INTERFACE_STATE_NOWAIT) { >> >> + mlx4_warn(dev, >> >> + "communication channel is offline\n"); >> >> + return -EIO; >> >> + } >> >> + >> >> msleep(100); >> >> wr_toggle = swab32(readl(&priv->mfunc.comm-> >> >> slave_write)); >> >> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c >> >> index 7183ac4..ea5e362 100644 >> >> --- a/drivers/net/ethernet/mellanox/mlx4/main.c >> >> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c >> >> @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) >> >> (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); >> >> if (!offline_bit) >> >> return 0; >> >> + >> >> + /* If device removal has been requested, >> >> + * do not continue retrying. >> >> + */ >> >> + if (dev->persist->interface_state & >> >> + MLX4_INTERFACE_STATE_NOWAIT) >> >> + break; >> >> + >> >> /* There are cases as part of AER/Reset flow that PF needs >> >> * around 100 msec to load. We therefore sleep for 100 msec >> >> * to allow other tasks to make use of that CPU during this >> >> @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) >> >> struct devlink *devlink = priv_to_devlink(priv); >> >> int active_vfs = 0; >> >> >> >> + if (mlx4_is_slave(dev)) >> >> + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; >> >> + >> >> mutex_lock(&persist->interface_state_mutex); >> >> persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; >> >> mutex_unlock(&persist->interface_state_mutex); >> >> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h >> >> index 42da355..d49f11d 100644 >> >> --- a/include/linux/mlx4/device.h >> >> +++ b/include/linux/mlx4/device.h >> >> @@ -468,6 +468,7 @@ enum { >> >> MLX4_INTERFACE_STATE_UP = 1 << 0, >> >> MLX4_INTERFACE_STATE_DELETION = 1 << 1, >> >> MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, >> >> + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, >> > >> > So, NOWAIT and SHUTDOWN are defined to the same value, so it might be >> > necessary to review it that is going to work as expected. Maybe doing >> > the shutdown removal is a path to consider (commit >> > b4353708f5a1c084fd73f1b6fd243b142157b173). >> > >> > Cascardo. >> > >> >> }; >> >> >> >> #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ >> >> -- >> >> 2.7.4 >> >> >> >> >> >> -- >> >> kernel-team mailing list >> >> kernel-team@lists.ubuntu.com >> >> https://lists.ubuntu.com/mailman/listinfo/kernel-team >> > >> > -- >> > kernel-team mailing list >> > kernel-team@lists.ubuntu.com >> > https://lists.ubuntu.com/mailman/listinfo/kernel-team
I have a Yakkety test kernel now available that included commit b4353708f5a1c084fd73f1b6fd243b142157b173 and commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4. Having commit b4353708f5a1c0 removes MLX4_INTERFACE_STATE_SHUTDOWN and all references to it. It allows commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4 to pick cleanly and not require a backport. Xenial did not require b4353708f5a(It reverts commit 9d769311805 which was added in v4.7-rc6) and it was a clean pick in Xenial. Zesty already has commit b4353708f5a. I think both commits should be added to Yakkety, not just 4cbe4dac82e4. I'll post a link to the v2 test kernel in the bug and resubmit a v2 SRU once everyone agrees thats the way to go. Thanks, Joe On 04/03/2017 02:35 PM, Joshua R. Poulson wrote: > Our friends at Mellanox have, we have been testing VF remove and add > as part of SR-IOV in Azure testing. Our regression tests include > on-premises Hyper-V as well. > > Thanks --jrp > > On Mon, Apr 3, 2017 at 6:32 AM, Thadeu Lima de Souza Cascardo > <cascardo@canonical.com> wrote: >> On Mon, Apr 03, 2017 at 05:11:34AM -0700, Joshua R. Poulson wrote: >>> We're engaged to test the driver in the intended scenario and code >>> path. We have a stable repro for the bug that this fixes. >>> >>> Thanks, --jrp >>> >> Is that on Yakkety, with this Yakkety backport as is? >> >> I am more concerned about the paths that would use SHUTDOWN, have you tested >> these as well? >> >> Thanks. >> Cascardo. >> >>> On Mon, Apr 3, 2017 at 2:26 AM, Thadeu Lima de Souza Cascardo >>> <cascardo@canonical.com> wrote: >>>> On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: >>>>> From: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>> >>>>> BugLink: http://bugs.launchpad.net/bugs/1672785 >>>>> >>>>> Some Hypervisors detach VFs from VMs by instantly causing an FLR event >>>>> to be generated for a VF. >>>>> >>>>> In the mlx4 case, this will cause that VF's comm channel to be disabled >>>>> before the VM has an opportunity to invoke the VF device's "shutdown" >>>>> method. >>>>> >>>>> For such Hypervisors, there is a race condition between the VF's >>>>> shutdown method and its internal-error detection/reset thread. >>>>> >>>>> The internal-error detection/reset thread (which runs every 5 seconds) also >>>>> detects a disabled comm channel. If the internal-error detection/reset >>>>> flow wins the race, we still get delays (while that flow tries repeatedly >>>>> to detect comm-channel recovery). >>>>> >>>>> The cited commit fixed the command timeout problem when the >>>>> internal-error detection/reset flow loses the race. >>>>> >>>>> This commit avoids the unneeded delays when the internal-error >>>>> detection/reset flow wins. >>>>> >>>>> Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") >>>>> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>> Reported-by: Simon Xiao <sixiao@microsoft.com> >>>>> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> >>>>> Signed-off-by: David S. Miller <davem@davemloft.net> >>>>> (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) >>>>> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> >>>>> --- >>>>> drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ >>>>> drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ >>>>> include/linux/mlx4/device.h | 1 + >>>>> 3 files changed, 23 insertions(+) >>>>> >>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>> index f04a423..be6906b 100644 >>>>> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>> @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) >>>>> rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); >>>>> if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { >>>>> /* PCI might be offline */ >>>>> + >>>>> + /* If device removal has been requested, >>>>> + * do not continue retrying. >>>>> + */ >>>>> + if (dev->persist->interface_state & >>>>> + MLX4_INTERFACE_STATE_NOWAIT) { >>>>> + mlx4_warn(dev, >>>>> + "communication channel is offline\n"); >>>>> + return -EIO; >>>>> + } >>>>> + >>>>> msleep(100); >>>>> wr_toggle = swab32(readl(&priv->mfunc.comm-> >>>>> slave_write)); >>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>> index 7183ac4..ea5e362 100644 >>>>> --- a/drivers/net/ethernet/mellanox/mlx4/main.c >>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>> @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) >>>>> (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); >>>>> if (!offline_bit) >>>>> return 0; >>>>> + >>>>> + /* If device removal has been requested, >>>>> + * do not continue retrying. >>>>> + */ >>>>> + if (dev->persist->interface_state & >>>>> + MLX4_INTERFACE_STATE_NOWAIT) >>>>> + break; >>>>> + >>>>> /* There are cases as part of AER/Reset flow that PF needs >>>>> * around 100 msec to load. We therefore sleep for 100 msec >>>>> * to allow other tasks to make use of that CPU during this >>>>> @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) >>>>> struct devlink *devlink = priv_to_devlink(priv); >>>>> int active_vfs = 0; >>>>> >>>>> + if (mlx4_is_slave(dev)) >>>>> + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; >>>>> + >>>>> mutex_lock(&persist->interface_state_mutex); >>>>> persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; >>>>> mutex_unlock(&persist->interface_state_mutex); >>>>> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h >>>>> index 42da355..d49f11d 100644 >>>>> --- a/include/linux/mlx4/device.h >>>>> +++ b/include/linux/mlx4/device.h >>>>> @@ -468,6 +468,7 @@ enum { >>>>> MLX4_INTERFACE_STATE_UP = 1 << 0, >>>>> MLX4_INTERFACE_STATE_DELETION = 1 << 1, >>>>> MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, >>>>> + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, >>>> So, NOWAIT and SHUTDOWN are defined to the same value, so it might be >>>> necessary to review it that is going to work as expected. Maybe doing >>>> the shutdown removal is a path to consider (commit >>>> b4353708f5a1c084fd73f1b6fd243b142157b173). >>>> >>>> Cascardo. >>>> >>>>> }; >>>>> >>>>> #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ >>>>> -- >>>>> 2.7.4 >>>>> >>>>> >>>>> -- >>>>> kernel-team mailing list >>>>> kernel-team@lists.ubuntu.com >>>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team >>>> -- >>>> kernel-team mailing list >>>> kernel-team@lists.ubuntu.com >>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team
I'll test it as soon as I see it, thanks! --jrp On Mon, Apr 3, 2017 at 6:45 AM, Joseph Salisbury <joseph.salisbury@canonical.com> wrote: > I have a Yakkety test kernel now available that included commit > b4353708f5a1c084fd73f1b6fd243b142157b173 and commit > 4cbe4dac82e423ecc9a0ba46af24a860853259f4. Having commit b4353708f5a1c0 > removes MLX4_INTERFACE_STATE_SHUTDOWN and all references to it. It > allows commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4 to pick cleanly > and not require a backport. > > Xenial did not require b4353708f5a(It reverts commit 9d769311805 which > was added in v4.7-rc6) and it was a clean pick in Xenial. Zesty already > has commit b4353708f5a. I think both commits should be added to > Yakkety, not just 4cbe4dac82e4. > > I'll post a link to the v2 test kernel in the bug and resubmit a v2 SRU > once everyone agrees thats the way to go. > > Thanks, > > Joe > > > > On 04/03/2017 02:35 PM, Joshua R. Poulson wrote: >> Our friends at Mellanox have, we have been testing VF remove and add >> as part of SR-IOV in Azure testing. Our regression tests include >> on-premises Hyper-V as well. >> >> Thanks --jrp >> >> On Mon, Apr 3, 2017 at 6:32 AM, Thadeu Lima de Souza Cascardo >> <cascardo@canonical.com> wrote: >>> On Mon, Apr 03, 2017 at 05:11:34AM -0700, Joshua R. Poulson wrote: >>>> We're engaged to test the driver in the intended scenario and code >>>> path. We have a stable repro for the bug that this fixes. >>>> >>>> Thanks, --jrp >>>> >>> Is that on Yakkety, with this Yakkety backport as is? >>> >>> I am more concerned about the paths that would use SHUTDOWN, have you tested >>> these as well? >>> >>> Thanks. >>> Cascardo. >>> >>>> On Mon, Apr 3, 2017 at 2:26 AM, Thadeu Lima de Souza Cascardo >>>> <cascardo@canonical.com> wrote: >>>>> On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: >>>>>> From: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>>> >>>>>> BugLink: http://bugs.launchpad.net/bugs/1672785 >>>>>> >>>>>> Some Hypervisors detach VFs from VMs by instantly causing an FLR event >>>>>> to be generated for a VF. >>>>>> >>>>>> In the mlx4 case, this will cause that VF's comm channel to be disabled >>>>>> before the VM has an opportunity to invoke the VF device's "shutdown" >>>>>> method. >>>>>> >>>>>> For such Hypervisors, there is a race condition between the VF's >>>>>> shutdown method and its internal-error detection/reset thread. >>>>>> >>>>>> The internal-error detection/reset thread (which runs every 5 seconds) also >>>>>> detects a disabled comm channel. If the internal-error detection/reset >>>>>> flow wins the race, we still get delays (while that flow tries repeatedly >>>>>> to detect comm-channel recovery). >>>>>> >>>>>> The cited commit fixed the command timeout problem when the >>>>>> internal-error detection/reset flow loses the race. >>>>>> >>>>>> This commit avoids the unneeded delays when the internal-error >>>>>> detection/reset flow wins. >>>>>> >>>>>> Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") >>>>>> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>>> Reported-by: Simon Xiao <sixiao@microsoft.com> >>>>>> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> >>>>>> Signed-off-by: David S. Miller <davem@davemloft.net> >>>>>> (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) >>>>>> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> >>>>>> --- >>>>>> drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ >>>>>> drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ >>>>>> include/linux/mlx4/device.h | 1 + >>>>>> 3 files changed, 23 insertions(+) >>>>>> >>>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>> index f04a423..be6906b 100644 >>>>>> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>> @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) >>>>>> rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); >>>>>> if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { >>>>>> /* PCI might be offline */ >>>>>> + >>>>>> + /* If device removal has been requested, >>>>>> + * do not continue retrying. >>>>>> + */ >>>>>> + if (dev->persist->interface_state & >>>>>> + MLX4_INTERFACE_STATE_NOWAIT) { >>>>>> + mlx4_warn(dev, >>>>>> + "communication channel is offline\n"); >>>>>> + return -EIO; >>>>>> + } >>>>>> + >>>>>> msleep(100); >>>>>> wr_toggle = swab32(readl(&priv->mfunc.comm-> >>>>>> slave_write)); >>>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>> index 7183ac4..ea5e362 100644 >>>>>> --- a/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>> @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) >>>>>> (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); >>>>>> if (!offline_bit) >>>>>> return 0; >>>>>> + >>>>>> + /* If device removal has been requested, >>>>>> + * do not continue retrying. >>>>>> + */ >>>>>> + if (dev->persist->interface_state & >>>>>> + MLX4_INTERFACE_STATE_NOWAIT) >>>>>> + break; >>>>>> + >>>>>> /* There are cases as part of AER/Reset flow that PF needs >>>>>> * around 100 msec to load. We therefore sleep for 100 msec >>>>>> * to allow other tasks to make use of that CPU during this >>>>>> @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) >>>>>> struct devlink *devlink = priv_to_devlink(priv); >>>>>> int active_vfs = 0; >>>>>> >>>>>> + if (mlx4_is_slave(dev)) >>>>>> + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; >>>>>> + >>>>>> mutex_lock(&persist->interface_state_mutex); >>>>>> persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; >>>>>> mutex_unlock(&persist->interface_state_mutex); >>>>>> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h >>>>>> index 42da355..d49f11d 100644 >>>>>> --- a/include/linux/mlx4/device.h >>>>>> +++ b/include/linux/mlx4/device.h >>>>>> @@ -468,6 +468,7 @@ enum { >>>>>> MLX4_INTERFACE_STATE_UP = 1 << 0, >>>>>> MLX4_INTERFACE_STATE_DELETION = 1 << 1, >>>>>> MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, >>>>>> + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, >>>>> So, NOWAIT and SHUTDOWN are defined to the same value, so it might be >>>>> necessary to review it that is going to work as expected. Maybe doing >>>>> the shutdown removal is a path to consider (commit >>>>> b4353708f5a1c084fd73f1b6fd243b142157b173). >>>>> >>>>> Cascardo. >>>>> >>>>>> }; >>>>>> >>>>>> #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ >>>>>> -- >>>>>> 2.7.4 >>>>>> >>>>>> >>>>>> -- >>>>>> kernel-team mailing list >>>>>> kernel-team@lists.ubuntu.com >>>>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team >>>>> -- >>>>> kernel-team mailing list >>>>> kernel-team@lists.ubuntu.com >>>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team > > >
Thanks, Josh. An explanation of the new test kernel and a link can be found here: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1672785/comments/7 On 04/03/2017 03:03 PM, Joshua R. Poulson wrote: > I'll test it as soon as I see it, thanks! --jrp > > On Mon, Apr 3, 2017 at 6:45 AM, Joseph Salisbury > <joseph.salisbury@canonical.com> wrote: >> I have a Yakkety test kernel now available that included commit >> b4353708f5a1c084fd73f1b6fd243b142157b173 and commit >> 4cbe4dac82e423ecc9a0ba46af24a860853259f4. Having commit b4353708f5a1c0 >> removes MLX4_INTERFACE_STATE_SHUTDOWN and all references to it. It >> allows commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4 to pick cleanly >> and not require a backport. >> >> Xenial did not require b4353708f5a(It reverts commit 9d769311805 which >> was added in v4.7-rc6) and it was a clean pick in Xenial. Zesty already >> has commit b4353708f5a. I think both commits should be added to >> Yakkety, not just 4cbe4dac82e4. >> >> I'll post a link to the v2 test kernel in the bug and resubmit a v2 SRU >> once everyone agrees thats the way to go. >> >> Thanks, >> >> Joe >> >> >> >> On 04/03/2017 02:35 PM, Joshua R. Poulson wrote: >>> Our friends at Mellanox have, we have been testing VF remove and add >>> as part of SR-IOV in Azure testing. Our regression tests include >>> on-premises Hyper-V as well. >>> >>> Thanks --jrp >>> >>> On Mon, Apr 3, 2017 at 6:32 AM, Thadeu Lima de Souza Cascardo >>> <cascardo@canonical.com> wrote: >>>> On Mon, Apr 03, 2017 at 05:11:34AM -0700, Joshua R. Poulson wrote: >>>>> We're engaged to test the driver in the intended scenario and code >>>>> path. We have a stable repro for the bug that this fixes. >>>>> >>>>> Thanks, --jrp >>>>> >>>> Is that on Yakkety, with this Yakkety backport as is? >>>> >>>> I am more concerned about the paths that would use SHUTDOWN, have you tested >>>> these as well? >>>> >>>> Thanks. >>>> Cascardo. >>>> >>>>> On Mon, Apr 3, 2017 at 2:26 AM, Thadeu Lima de Souza Cascardo >>>>> <cascardo@canonical.com> wrote: >>>>>> On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: >>>>>>> From: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>>>> >>>>>>> BugLink: http://bugs.launchpad.net/bugs/1672785 >>>>>>> >>>>>>> Some Hypervisors detach VFs from VMs by instantly causing an FLR event >>>>>>> to be generated for a VF. >>>>>>> >>>>>>> In the mlx4 case, this will cause that VF's comm channel to be disabled >>>>>>> before the VM has an opportunity to invoke the VF device's "shutdown" >>>>>>> method. >>>>>>> >>>>>>> For such Hypervisors, there is a race condition between the VF's >>>>>>> shutdown method and its internal-error detection/reset thread. >>>>>>> >>>>>>> The internal-error detection/reset thread (which runs every 5 seconds) also >>>>>>> detects a disabled comm channel. If the internal-error detection/reset >>>>>>> flow wins the race, we still get delays (while that flow tries repeatedly >>>>>>> to detect comm-channel recovery). >>>>>>> >>>>>>> The cited commit fixed the command timeout problem when the >>>>>>> internal-error detection/reset flow loses the race. >>>>>>> >>>>>>> This commit avoids the unneeded delays when the internal-error >>>>>>> detection/reset flow wins. >>>>>>> >>>>>>> Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") >>>>>>> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>>>> Reported-by: Simon Xiao <sixiao@microsoft.com> >>>>>>> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> >>>>>>> Signed-off-by: David S. Miller <davem@davemloft.net> >>>>>>> (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) >>>>>>> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> >>>>>>> --- >>>>>>> drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ >>>>>>> drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ >>>>>>> include/linux/mlx4/device.h | 1 + >>>>>>> 3 files changed, 23 insertions(+) >>>>>>> >>>>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>>> index f04a423..be6906b 100644 >>>>>>> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>>> @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) >>>>>>> rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); >>>>>>> if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { >>>>>>> /* PCI might be offline */ >>>>>>> + >>>>>>> + /* If device removal has been requested, >>>>>>> + * do not continue retrying. >>>>>>> + */ >>>>>>> + if (dev->persist->interface_state & >>>>>>> + MLX4_INTERFACE_STATE_NOWAIT) { >>>>>>> + mlx4_warn(dev, >>>>>>> + "communication channel is offline\n"); >>>>>>> + return -EIO; >>>>>>> + } >>>>>>> + >>>>>>> msleep(100); >>>>>>> wr_toggle = swab32(readl(&priv->mfunc.comm-> >>>>>>> slave_write)); >>>>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>>> index 7183ac4..ea5e362 100644 >>>>>>> --- a/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>>> @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) >>>>>>> (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); >>>>>>> if (!offline_bit) >>>>>>> return 0; >>>>>>> + >>>>>>> + /* If device removal has been requested, >>>>>>> + * do not continue retrying. >>>>>>> + */ >>>>>>> + if (dev->persist->interface_state & >>>>>>> + MLX4_INTERFACE_STATE_NOWAIT) >>>>>>> + break; >>>>>>> + >>>>>>> /* There are cases as part of AER/Reset flow that PF needs >>>>>>> * around 100 msec to load. We therefore sleep for 100 msec >>>>>>> * to allow other tasks to make use of that CPU during this >>>>>>> @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) >>>>>>> struct devlink *devlink = priv_to_devlink(priv); >>>>>>> int active_vfs = 0; >>>>>>> >>>>>>> + if (mlx4_is_slave(dev)) >>>>>>> + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; >>>>>>> + >>>>>>> mutex_lock(&persist->interface_state_mutex); >>>>>>> persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; >>>>>>> mutex_unlock(&persist->interface_state_mutex); >>>>>>> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h >>>>>>> index 42da355..d49f11d 100644 >>>>>>> --- a/include/linux/mlx4/device.h >>>>>>> +++ b/include/linux/mlx4/device.h >>>>>>> @@ -468,6 +468,7 @@ enum { >>>>>>> MLX4_INTERFACE_STATE_UP = 1 << 0, >>>>>>> MLX4_INTERFACE_STATE_DELETION = 1 << 1, >>>>>>> MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, >>>>>>> + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, >>>>>> So, NOWAIT and SHUTDOWN are defined to the same value, so it might be >>>>>> necessary to review it that is going to work as expected. Maybe doing >>>>>> the shutdown removal is a path to consider (commit >>>>>> b4353708f5a1c084fd73f1b6fd243b142157b173). >>>>>> >>>>>> Cascardo. >>>>>> >>>>>>> }; >>>>>>> >>>>>>> #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ >>>>>>> -- >>>>>>> 2.7.4 >>>>>>> >>>>>>> >>>>>>> -- >>>>>>> kernel-team mailing list >>>>>>> kernel-team@lists.ubuntu.com >>>>>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team >>>>>> -- >>>>>> kernel-team mailing list >>>>>> kernel-team@lists.ubuntu.com >>>>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team >> >>
It's in my queue. --jrp On Mon, Apr 3, 2017 at 7:15 AM, Joseph Salisbury <joseph.salisbury@canonical.com> wrote: > Thanks, Josh. An explanation of the new test kernel and a link can be > found here: > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1672785/comments/7 > > On 04/03/2017 03:03 PM, Joshua R. Poulson wrote: >> I'll test it as soon as I see it, thanks! --jrp >> >> On Mon, Apr 3, 2017 at 6:45 AM, Joseph Salisbury >> <joseph.salisbury@canonical.com> wrote: >>> I have a Yakkety test kernel now available that included commit >>> b4353708f5a1c084fd73f1b6fd243b142157b173 and commit >>> 4cbe4dac82e423ecc9a0ba46af24a860853259f4. Having commit b4353708f5a1c0 >>> removes MLX4_INTERFACE_STATE_SHUTDOWN and all references to it. It >>> allows commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4 to pick cleanly >>> and not require a backport. >>> >>> Xenial did not require b4353708f5a(It reverts commit 9d769311805 which >>> was added in v4.7-rc6) and it was a clean pick in Xenial. Zesty already >>> has commit b4353708f5a. I think both commits should be added to >>> Yakkety, not just 4cbe4dac82e4. >>> >>> I'll post a link to the v2 test kernel in the bug and resubmit a v2 SRU >>> once everyone agrees thats the way to go. >>> >>> Thanks, >>> >>> Joe >>> >>> >>> >>> On 04/03/2017 02:35 PM, Joshua R. Poulson wrote: >>>> Our friends at Mellanox have, we have been testing VF remove and add >>>> as part of SR-IOV in Azure testing. Our regression tests include >>>> on-premises Hyper-V as well. >>>> >>>> Thanks --jrp >>>> >>>> On Mon, Apr 3, 2017 at 6:32 AM, Thadeu Lima de Souza Cascardo >>>> <cascardo@canonical.com> wrote: >>>>> On Mon, Apr 03, 2017 at 05:11:34AM -0700, Joshua R. Poulson wrote: >>>>>> We're engaged to test the driver in the intended scenario and code >>>>>> path. We have a stable repro for the bug that this fixes. >>>>>> >>>>>> Thanks, --jrp >>>>>> >>>>> Is that on Yakkety, with this Yakkety backport as is? >>>>> >>>>> I am more concerned about the paths that would use SHUTDOWN, have you tested >>>>> these as well? >>>>> >>>>> Thanks. >>>>> Cascardo. >>>>> >>>>>> On Mon, Apr 3, 2017 at 2:26 AM, Thadeu Lima de Souza Cascardo >>>>>> <cascardo@canonical.com> wrote: >>>>>>> On Thu, Mar 30, 2017 at 04:35:59PM -0400, Joseph Salisbury wrote: >>>>>>>> From: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>>>>> >>>>>>>> BugLink: http://bugs.launchpad.net/bugs/1672785 >>>>>>>> >>>>>>>> Some Hypervisors detach VFs from VMs by instantly causing an FLR event >>>>>>>> to be generated for a VF. >>>>>>>> >>>>>>>> In the mlx4 case, this will cause that VF's comm channel to be disabled >>>>>>>> before the VM has an opportunity to invoke the VF device's "shutdown" >>>>>>>> method. >>>>>>>> >>>>>>>> For such Hypervisors, there is a race condition between the VF's >>>>>>>> shutdown method and its internal-error detection/reset thread. >>>>>>>> >>>>>>>> The internal-error detection/reset thread (which runs every 5 seconds) also >>>>>>>> detects a disabled comm channel. If the internal-error detection/reset >>>>>>>> flow wins the race, we still get delays (while that flow tries repeatedly >>>>>>>> to detect comm-channel recovery). >>>>>>>> >>>>>>>> The cited commit fixed the command timeout problem when the >>>>>>>> internal-error detection/reset flow loses the race. >>>>>>>> >>>>>>>> This commit avoids the unneeded delays when the internal-error >>>>>>>> detection/reset flow wins. >>>>>>>> >>>>>>>> Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") >>>>>>>> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> >>>>>>>> Reported-by: Simon Xiao <sixiao@microsoft.com> >>>>>>>> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> >>>>>>>> Signed-off-by: David S. Miller <davem@davemloft.net> >>>>>>>> (backported from commit 4cbe4dac82e423ecc9a0ba46af24a860853259f4) >>>>>>>> Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> >>>>>>>> --- >>>>>>>> drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ >>>>>>>> drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ >>>>>>>> include/linux/mlx4/device.h | 1 + >>>>>>>> 3 files changed, 23 insertions(+) >>>>>>>> >>>>>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>>>> index f04a423..be6906b 100644 >>>>>>>> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c >>>>>>>> @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) >>>>>>>> rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); >>>>>>>> if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { >>>>>>>> /* PCI might be offline */ >>>>>>>> + >>>>>>>> + /* If device removal has been requested, >>>>>>>> + * do not continue retrying. >>>>>>>> + */ >>>>>>>> + if (dev->persist->interface_state & >>>>>>>> + MLX4_INTERFACE_STATE_NOWAIT) { >>>>>>>> + mlx4_warn(dev, >>>>>>>> + "communication channel is offline\n"); >>>>>>>> + return -EIO; >>>>>>>> + } >>>>>>>> + >>>>>>>> msleep(100); >>>>>>>> wr_toggle = swab32(readl(&priv->mfunc.comm-> >>>>>>>> slave_write)); >>>>>>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>>>> index 7183ac4..ea5e362 100644 >>>>>>>> --- a/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>>>> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c >>>>>>>> @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) >>>>>>>> (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); >>>>>>>> if (!offline_bit) >>>>>>>> return 0; >>>>>>>> + >>>>>>>> + /* If device removal has been requested, >>>>>>>> + * do not continue retrying. >>>>>>>> + */ >>>>>>>> + if (dev->persist->interface_state & >>>>>>>> + MLX4_INTERFACE_STATE_NOWAIT) >>>>>>>> + break; >>>>>>>> + >>>>>>>> /* There are cases as part of AER/Reset flow that PF needs >>>>>>>> * around 100 msec to load. We therefore sleep for 100 msec >>>>>>>> * to allow other tasks to make use of that CPU during this >>>>>>>> @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) >>>>>>>> struct devlink *devlink = priv_to_devlink(priv); >>>>>>>> int active_vfs = 0; >>>>>>>> >>>>>>>> + if (mlx4_is_slave(dev)) >>>>>>>> + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; >>>>>>>> + >>>>>>>> mutex_lock(&persist->interface_state_mutex); >>>>>>>> persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; >>>>>>>> mutex_unlock(&persist->interface_state_mutex); >>>>>>>> diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h >>>>>>>> index 42da355..d49f11d 100644 >>>>>>>> --- a/include/linux/mlx4/device.h >>>>>>>> +++ b/include/linux/mlx4/device.h >>>>>>>> @@ -468,6 +468,7 @@ enum { >>>>>>>> MLX4_INTERFACE_STATE_UP = 1 << 0, >>>>>>>> MLX4_INTERFACE_STATE_DELETION = 1 << 1, >>>>>>>> MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, >>>>>>>> + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, >>>>>>> So, NOWAIT and SHUTDOWN are defined to the same value, so it might be >>>>>>> necessary to review it that is going to work as expected. Maybe doing >>>>>>> the shutdown removal is a path to consider (commit >>>>>>> b4353708f5a1c084fd73f1b6fd243b142157b173). >>>>>>> >>>>>>> Cascardo. >>>>>>> >>>>>>>> }; >>>>>>>> >>>>>>>> #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ >>>>>>>> -- >>>>>>>> 2.7.4 >>>>>>>> >>>>>>>> >>>>>>>> -- >>>>>>>> kernel-team mailing list >>>>>>>> kernel-team@lists.ubuntu.com >>>>>>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team >>>>>>> -- >>>>>>> kernel-team mailing list >>>>>>> kernel-team@lists.ubuntu.com >>>>>>> https://lists.ubuntu.com/mailman/listinfo/kernel-team >>> >>> >
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index f04a423..be6906b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2278,6 +2278,17 @@ static int sync_toggles(struct mlx4_dev *dev) rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { /* PCI might be offline */ + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) { + mlx4_warn(dev, + "communication channel is offline\n"); + return -EIO; + } + msleep(100); wr_toggle = swab32(readl(&priv->mfunc.comm-> slave_write)); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7183ac4..ea5e362 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1916,6 +1916,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); if (!offline_bit) return 0; + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) + break; + /* There are cases as part of AER/Reset flow that PF needs * around 100 msec to load. We therefore sleep for 100 msec * to allow other tasks to make use of that CPU during this @@ -3930,6 +3938,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + if (mlx4_is_slave(dev)) + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; + mutex_lock(&persist->interface_state_mutex); persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; mutex_unlock(&persist->interface_state_mutex); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da355..d49f11d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -468,6 +468,7 @@ enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \