diff mbox

[PATCHv3,net-next-2.6,2/3] qlcnic: Take FW dump via ethtool

Message ID 1305154448-9687-4-git-send-email-anirban.chakraborty@qlogic.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Anirban Chakraborty May 11, 2011, 10:54 p.m. UTC
Driver checks if the previous dump has been cleared before taking the dump.
It doesn't take the dump if it is not cleared.

Changes from v2:
Added lock to protect dump data structures from being mangled while
dumping or setting them via ethtool.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
---
 drivers/net/qlcnic/qlcnic_ethtool.c |   81 +++++++++++++++++++++++++++++++++++
 1 files changed, 81 insertions(+), 0 deletions(-)

Comments

Ben Hutchings May 12, 2011, 5:27 p.m. UTC | #1
On Wed, 2011-05-11 at 15:54 -0700, Anirban Chakraborty wrote:
> Driver checks if the previous dump has been cleared before taking the dump.
> It doesn't take the dump if it is not cleared.
> 
> Changes from v2:
> Added lock to protect dump data structures from being mangled while
> dumping or setting them via ethtool.

Unfortunately it still seems to be possible for the dump length to
change between the ethtool core calling qlcnic_get_dump_flag() and
qlcnic_get_dump_data().

So I think qlcnic_get_dump_data() will need to double-check the length
after taking the internal lock:

[...]
> +static int
> +qlcnic_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
> +                       void *buffer)
> +{
> +       int i, copy_sz;
> +       u32 *hdr_ptr, *data;
> +       struct qlcnic_adapter *adapter = netdev_priv(netdev);
> +       struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
> +
> +       if (qlcnic_api_lock(adapter))
> +               return -EIO;
[...]

	if (dump->len < fw_dump->tmpl_hdr->size + fw_dump->size) {
		qlcnic_api_unlock(adapter);
		return -EINVAL;
	}

I'm not sure about the error code... and I'm really not happy about the
need to check lengths in both the ethtool core and the driver.

Can't you change the function that actually makes a dump to acquire the
RTNL lock?  (You'll need to do that *before* acquiring the driver's own
lock.)

Ben.
Anirban Chakraborty May 12, 2011, 6:53 p.m. UTC | #2
On May 12, 2011, at 10:27 AM, Ben Hutchings wrote:

> On Wed, 2011-05-11 at 15:54 -0700, Anirban Chakraborty wrote:
>> Driver checks if the previous dump has been cleared before taking the dump.
>> It doesn't take the dump if it is not cleared.
>> 
>> Changes from v2:
>> Added lock to protect dump data structures from being mangled while
>> dumping or setting them via ethtool.
> 
> Unfortunately it still seems to be possible for the dump length to
> change between the ethtool core calling qlcnic_get_dump_flag() and
> qlcnic_get_dump_data().

dump length is serialized via the driver lock. dump length is a static entity for a given capture mask
and it can only be changed when there is a different capture mask set in the driver (via calling
set_dump() from ethtool core).
Actual dump size is determined during the initial steps of FW dump which takes the driver lock
to start with. So, I am not sure how the dump length could be changed between the calls to
get_dump_flag and get_dump_data from within the ethtool core without a call to set_dump()
in between.

> 
> So I think qlcnic_get_dump_data() will need to double-check the length
> after taking the internal lock:
> 
> [...]
>> +static int
>> +qlcnic_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
>> +                       void *buffer)
>> +{
>> +       int i, copy_sz;
>> +       u32 *hdr_ptr, *data;
>> +       struct qlcnic_adapter *adapter = netdev_priv(netdev);
>> +       struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
>> +
>> +       if (qlcnic_api_lock(adapter))
>> +               return -EIO;
> [...]
> 
> 	if (dump->len < fw_dump->tmpl_hdr->size + fw_dump->size) {
> 		qlcnic_api_unlock(adapter);
> 		return -EINVAL;
> 	}
> 
> I'm not sure about the error code... and I'm really not happy about the
> need to check lengths in both the ethtool core and the driver.
I can put the check in here but don't think it is required really.

> 
> Can't you change the function that actually makes a dump to acquire the
> RTNL lock?  (You'll need to do that *before* acquiring the driver's own
> lock.)
We can't do that because the driver lock is taken at much higher level where it does
some hardware specific things even before attempting to take FW dump.

Thanks.
-Anirban



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings May 12, 2011, 7:18 p.m. UTC | #3
On Thu, 2011-05-12 at 11:53 -0700, Anirban Chakraborty wrote:
> On May 12, 2011, at 10:27 AM, Ben Hutchings wrote:
> 
> > On Wed, 2011-05-11 at 15:54 -0700, Anirban Chakraborty wrote:
> >> Driver checks if the previous dump has been cleared before taking the dump.
> >> It doesn't take the dump if it is not cleared.
> >> 
> >> Changes from v2:
> >> Added lock to protect dump data structures from being mangled while
> >> dumping or setting them via ethtool.
> > 
> > Unfortunately it still seems to be possible for the dump length to
> > change between the ethtool core calling qlcnic_get_dump_flag() and
> > qlcnic_get_dump_data().
> 
> dump length is serialized via the driver lock. dump length is a static
> entity for a given capture mask and it can only be changed when there
> is a different capture mask set in the driver (via calling set_dump()
> from ethtool core).

OK.

> Actual dump size is determined during the initial steps of FW dump
> which takes the driver lock to start with. So, I am not sure how the
> dump length could be changed between the calls to get_dump_flag and
> get_dump_data from within the ethtool core without a call to
> set_dump() in between.
[...]

What prevents this sequence:

1. Driver detects firmware dump is required, and creates the dump
(length L1).
2. User changes firmware dump flags through ethtool.
3. User starts to save firmware dump through ethtool:
   a. ethtool utility reads dump length (= L1) and allocates user buffer
   b. ethtool utility reads dump:
   c. ethtool core reads dump length (L1) and allocates kernel buffer
4. Meanwhile, driver detects firmware dump is required again, and
creates a new dump (length L2, since dump flags changed)
5. (Continuation of 3)
   d. ethtool core calls driver to read firmware dump
   e. Driver copies new dump (length L2) into buffer of length L1

Ben.
Anirban Chakraborty May 12, 2011, 8:54 p.m. UTC | #4
On May 12, 2011, at 12:18 PM, Ben Hutchings wrote:

> On Thu, 2011-05-12 at 11:53 -0700, Anirban Chakraborty wrote:
>> On May 12, 2011, at 10:27 AM, Ben Hutchings wrote:
>> 
>>> On Wed, 2011-05-11 at 15:54 -0700, Anirban Chakraborty wrote:
>>>> Driver checks if the previous dump has been cleared before taking the dump.
>>>> It doesn't take the dump if it is not cleared.
>>>> 
>>>> Changes from v2:
>>>> Added lock to protect dump data structures from being mangled while
>>>> dumping or setting them via ethtool.
>>> 
>>> Unfortunately it still seems to be possible for the dump length to
>>> change between the ethtool core calling qlcnic_get_dump_flag() and
>>> qlcnic_get_dump_data().
>> 
>> dump length is serialized via the driver lock. dump length is a static
>> entity for a given capture mask and it can only be changed when there
>> is a different capture mask set in the driver (via calling set_dump()
>> from ethtool core).
> 
> OK.
> 
>> Actual dump size is determined during the initial steps of FW dump
>> which takes the driver lock to start with. So, I am not sure how the
>> dump length could be changed between the calls to get_dump_flag and
>> get_dump_data from within the ethtool core without a call to
>> set_dump() in between.
> [...]
> 
> What prevents this sequence:
> 
> 1. Driver detects firmware dump is required, and creates the dump
> (length L1).
> 2. User changes firmware dump flags through ethtool.
> 3. User starts to save firmware dump through ethtool:
>   a. ethtool utility reads dump length (= L1) and allocates user buffer
>   b. ethtool utility reads dump:
>   c. ethtool core reads dump length (L1) and allocates kernel buffer
> 4. Meanwhile, driver detects firmware dump is required again, and
> creates a new dump (length L2, since dump flags changed)

This step won't happen as driver ensures that if a dump is taken that is not
cleared yet by ethtool utility, it is not going to take any further dump. So, until
the get_dump_data() has been called to fetch the dump data, changing dump
flag (and hence dump size) will not have any effect.
In 3 above, ethtool core created a buffer of L1 size but hasn't yet called 
get_dump_data() of the driver, so driver is still holding onto its previously
dumped data even though capture mask has been changed and the driver
encountered a situation where it ought to take the dump.

-Anirban
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings May 12, 2011, 9:02 p.m. UTC | #5
On Thu, 2011-05-12 at 13:54 -0700, Anirban Chakraborty wrote:
> On May 12, 2011, at 12:18 PM, Ben Hutchings wrote:
[...]
> > What prevents this sequence:
> > 
> > 1. Driver detects firmware dump is required, and creates the dump
> > (length L1).
> > 2. User changes firmware dump flags through ethtool.
> > 3. User starts to save firmware dump through ethtool:
> >   a. ethtool utility reads dump length (= L1) and allocates user buffer
> >   b. ethtool utility reads dump:
> >   c. ethtool core reads dump length (L1) and allocates kernel buffer
> > 4. Meanwhile, driver detects firmware dump is required again, and
> > creates a new dump (length L2, since dump flags changed)
> 
> This step won't happen as driver ensures that if a dump is taken that is not
> cleared yet by ethtool utility, it is not going to take any further dump. So, until
> the get_dump_data() has been called to fetch the dump data, changing dump
> flag (and hence dump size) will not have any effect.
> In 3 above, ethtool core created a buffer of L1 size but hasn't yet called 
> get_dump_data() of the driver, so driver is still holding onto its previously
> dumped data even though capture mask has been changed and the driver
> encountered a situation where it ought to take the dump.

OK, that sounds safe.

Ben.
diff mbox

Patch

diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index c541461..9efc690 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -965,6 +965,84 @@  static void qlcnic_set_msglevel(struct net_device *netdev, u32 msglvl)
 	adapter->msg_enable = msglvl;
 }
 
+static int
+qlcnic_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
+{
+	struct qlcnic_adapter *adapter = netdev_priv(netdev);
+	struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
+
+	dump->len = fw_dump->tmpl_hdr->size + fw_dump->size;
+	dump->flag = fw_dump->tmpl_hdr->drv_cap_mask;
+	dump->version = adapter->fw_version;
+	return 0;
+}
+
+static int
+qlcnic_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
+			void *buffer)
+{
+	int i, copy_sz;
+	u32 *hdr_ptr, *data;
+	struct qlcnic_adapter *adapter = netdev_priv(netdev);
+	struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
+
+	if (qlcnic_api_lock(adapter))
+		return -EIO;
+	if (!fw_dump->clr) {
+		netdev_info(netdev, "Dump not available\n");
+		qlcnic_api_unlock(adapter);
+		return -EINVAL;
+	}
+	/* Copy template header first */
+	copy_sz = fw_dump->tmpl_hdr->size;
+	hdr_ptr = (u32 *) fw_dump->tmpl_hdr;
+	data = (u32 *) buffer;
+	for (i = 0; i < copy_sz/sizeof(u32); i++)
+		*data++ = cpu_to_le32(*hdr_ptr++);
+
+	/* Copy captured dump data */
+	memcpy(buffer + copy_sz, fw_dump->data, fw_dump->size);
+	dump->len = copy_sz + fw_dump->size;
+	dump->flag = fw_dump->tmpl_hdr->drv_cap_mask;
+
+	/* Free dump area once data has been captured */
+	vfree(fw_dump->data);
+	fw_dump->data = NULL;
+	fw_dump->clr = 0;
+	qlcnic_api_unlock(adapter);
+
+	return 0;
+}
+
+static int
+qlcnic_set_dump(struct net_device *netdev, struct ethtool_dump *val)
+{
+	int ret = 0;
+	struct qlcnic_adapter *adapter = netdev_priv(netdev);
+	struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
+
+	if (val->flag == QLCNIC_FORCE_FW_DUMP_KEY) {
+		netdev_info(netdev, "Forcing a FW dump\n");
+		qlcnic_dev_request_reset(adapter);
+	} else {
+		if (val->flag > QLCNIC_DUMP_MASK_MAX ||
+			val->flag < QLCNIC_DUMP_MASK_MIN) {
+				netdev_info(netdev,
+				"Invalid dump level: 0x%x\n", val->flag);
+				ret = -EINVAL;
+				goto out;
+		}
+		if (qlcnic_api_lock(adapter))
+			return -EIO;
+		fw_dump->tmpl_hdr->drv_cap_mask = val->flag & 0xff;
+		qlcnic_api_unlock(adapter);
+		netdev_info(netdev, "Driver mask changed to: 0x%x\n",
+			fw_dump->tmpl_hdr->drv_cap_mask);
+	}
+out:
+	return ret;
+}
+
 const struct ethtool_ops qlcnic_ethtool_ops = {
 	.get_settings = qlcnic_get_settings,
 	.set_settings = qlcnic_set_settings,
@@ -991,4 +1069,7 @@  const struct ethtool_ops qlcnic_ethtool_ops = {
 	.set_phys_id = qlcnic_set_led,
 	.set_msglevel = qlcnic_set_msglevel,
 	.get_msglevel = qlcnic_get_msglevel,
+	.get_dump_flag = qlcnic_get_dump_flag,
+	.get_dump_data = qlcnic_get_dump_data,
+	.set_dump = qlcnic_set_dump,
 };