diff mbox

[v2,4/4] bgmac: fix DMA rx corruption

Message ID 1428848565-94187-4-git-send-email-nbd@openwrt.org
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Felix Fietkau April 12, 2015, 2:22 p.m. UTC
The driver needs to inform the hardware about the first invalid (not yet
filled) rx slot, by writing its DMA descriptor pointer offset to the
BGMAC_DMA_RX_INDEX register.

This register was set to a value exceeding the rx ring size, effectively
allowing the hardware constant access to the full ring, regardless of
which slots are initialized.

Fix this by updating the register in bgmac_dma_rx_setup_desc.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

Comments

Eric Dumazet April 12, 2015, 6:09 p.m. UTC | #1
On Sun, 2015-04-12 at 16:22 +0200, Felix Fietkau wrote:
> The driver needs to inform the hardware about the first invalid (not yet
> filled) rx slot, by writing its DMA descriptor pointer offset to the
> BGMAC_DMA_RX_INDEX register.
> 
> This register was set to a value exceeding the rx ring size, effectively
> allowing the hardware constant access to the full ring, regardless of
> which slots are initialized.
> 
> Fix this by updating the register in bgmac_dma_rx_setup_desc.
> 
> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
> ---
>  drivers/net/ethernet/broadcom/bgmac.c | 27 ++++++++++++++++++---------
>  1 file changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
> index e332de8..67993d7 100644
> --- a/drivers/net/ethernet/broadcom/bgmac.c
> +++ b/drivers/net/ethernet/broadcom/bgmac.c
> @@ -362,6 +362,16 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
>  	return 0;
>  }
>  
> +static void bgmac_dma_rx_update_index(struct bgmac *bgmac,
> +				      struct bgmac_dma_ring *ring)
> +{
> +	dma_wmb();
> +
> +	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
> +		    ring->index_base +
> +		    ring->end * sizeof(struct bgmac_dma_desc));
> +}
> +
>  static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
>  				    struct bgmac_dma_ring *ring, int desc_idx)
>  {
> @@ -380,6 +390,8 @@ static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
>  	dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
>  	dma_desc->ctl0 = cpu_to_le32(ctl0);
>  	dma_desc->ctl1 = cpu_to_le32(ctl1);
> +
> +	ring->end = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;

Given the fact that previous driver kind of worked, have you tried not
doing the modulo at all here ?

ring->end = desc_idx + 1;

It looks like the hardware accepted setting BGMAC_DMA_RX_INDEX to
ring->index_base + ring->num_slots * sizeof(struct bgmac_dma_desc)

So apparently the presence of BGMAC_DESC_CTL0_EOT in last rx desc was
enough to avoid any problem with this.

Thanks


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Felix Fietkau April 12, 2015, 6:46 p.m. UTC | #2
On 2015-04-12 20:09, Eric Dumazet wrote:
> On Sun, 2015-04-12 at 16:22 +0200, Felix Fietkau wrote:
>> The driver needs to inform the hardware about the first invalid (not yet
>> filled) rx slot, by writing its DMA descriptor pointer offset to the
>> BGMAC_DMA_RX_INDEX register.
>> 
>> This register was set to a value exceeding the rx ring size, effectively
>> allowing the hardware constant access to the full ring, regardless of
>> which slots are initialized.
>> 
>> Fix this by updating the register in bgmac_dma_rx_setup_desc.
>> 
>> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
>> ---
>>  drivers/net/ethernet/broadcom/bgmac.c | 27 ++++++++++++++++++---------
>>  1 file changed, 18 insertions(+), 9 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
>> index e332de8..67993d7 100644
>> --- a/drivers/net/ethernet/broadcom/bgmac.c
>> +++ b/drivers/net/ethernet/broadcom/bgmac.c
>> @@ -362,6 +362,16 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
>>  	return 0;
>>  }
>>  
>> +static void bgmac_dma_rx_update_index(struct bgmac *bgmac,
>> +				      struct bgmac_dma_ring *ring)
>> +{
>> +	dma_wmb();
>> +
>> +	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
>> +		    ring->index_base +
>> +		    ring->end * sizeof(struct bgmac_dma_desc));
>> +}
>> +
>>  static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
>>  				    struct bgmac_dma_ring *ring, int desc_idx)
>>  {
>> @@ -380,6 +390,8 @@ static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
>>  	dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
>>  	dma_desc->ctl0 = cpu_to_le32(ctl0);
>>  	dma_desc->ctl1 = cpu_to_le32(ctl1);
>> +
>> +	ring->end = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
> 
> Given the fact that previous driver kind of worked, have you tried not
> doing the modulo at all here ?
The previous driver was causing reproducible DMA corruption issues under
load. My patch fixes this, and I've verified this with hours of stress
testing (previously I could reproduce the issue in minutes).

> ring->end = desc_idx + 1;
> 
> It looks like the hardware accepted setting BGMAC_DMA_RX_INDEX to
> ring->index_base + ring->num_slots * sizeof(struct bgmac_dma_desc)
The hardware accepts it, but with that value, it considers all rx slot
entries filled, there's no CPU ownership bit in the descriptor.
According to documentation, that register indicates the first *invalid*
rx descriptor (i.e. one where the CPU hasn't posted a valid buffer yet).

> So apparently the presence of BGMAC_DESC_CTL0_EOT in last rx desc was
> enough to avoid any problem with this.
EOT serves a completely different purpose. It simply indicates the end
of the ring, telling the hardware to go back to the beginning for the
next descriptor.

- Felix
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet April 12, 2015, 7:04 p.m. UTC | #3
On Sun, 2015-04-12 at 20:46 +0200, Felix Fietkau wrote:
> On 2015-04-12 20:09, Eric Dumazet wrote:

> > 
> > Given the fact that previous driver kind of worked, have you tried not
> > doing the modulo at all here ?
> The previous driver was causing reproducible DMA corruption issues under
> load. My patch fixes this, and I've verified this with hours of stress
> testing (previously I could reproduce the issue in minutes).
> 
> > ring->end = desc_idx + 1;
> > 
> > It looks like the hardware accepted setting BGMAC_DMA_RX_INDEX to
> > ring->index_base + ring->num_slots * sizeof(struct bgmac_dma_desc)
> The hardware accepts it, but with that value, it considers all rx slot
> entries filled, there's no CPU ownership bit in the descriptor.
> According to documentation, that register indicates the first *invalid*
> rx descriptor (i.e. one where the CPU hasn't posted a valid buffer yet).
> 
> > So apparently the presence of BGMAC_DESC_CTL0_EOT in last rx desc was
> > enough to avoid any problem with this.
> EOT serves a completely different purpose. It simply indicates the end
> of the ring, telling the hardware to go back to the beginning for the
> next descriptor.

Right, but after your patch, we program the hardware with end == start
== 0

So I do not really understand how the hardware can still deliver first
frame.

If it was really taking care of 'end' value, then it should not even
deliver one frame.

The dma corruption happened because hardware was simply reusing frames,
that driver already took to upper stack, since as you said, there is no
ownership bit.

You might try your patch with BGMAC_RX_RING_SLOTS = 3 and check if it is
still working properly.

I find BGMAC_RX_RING_SLOTS==511 very suspect, and a comment of why this
magic value is not 512 would be nice.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Felix Fietkau April 12, 2015, 7:25 p.m. UTC | #4
On 2015-04-12 21:04, Eric Dumazet wrote:
> On Sun, 2015-04-12 at 20:46 +0200, Felix Fietkau wrote:
>> On 2015-04-12 20:09, Eric Dumazet wrote:
> 
>> > 
>> > Given the fact that previous driver kind of worked, have you tried not
>> > doing the modulo at all here ?
>> The previous driver was causing reproducible DMA corruption issues under
>> load. My patch fixes this, and I've verified this with hours of stress
>> testing (previously I could reproduce the issue in minutes).
>> 
>> > ring->end = desc_idx + 1;
>> > 
>> > It looks like the hardware accepted setting BGMAC_DMA_RX_INDEX to
>> > ring->index_base + ring->num_slots * sizeof(struct bgmac_dma_desc)
>> The hardware accepts it, but with that value, it considers all rx slot
>> entries filled, there's no CPU ownership bit in the descriptor.
>> According to documentation, that register indicates the first *invalid*
>> rx descriptor (i.e. one where the CPU hasn't posted a valid buffer yet).
>> 
>> > So apparently the presence of BGMAC_DESC_CTL0_EOT in last rx desc was
>> > enough to avoid any problem with this.
>> EOT serves a completely different purpose. It simply indicates the end
>> of the ring, telling the hardware to go back to the beginning for the
>> next descriptor.
> 
> Right, but after your patch, we program the hardware with end == start
> == 0
> 
> So I do not really understand how the hardware can still deliver first
> frame.
> 
> If it was really taking care of 'end' value, then it should not even
> deliver one frame.
Good point.

> The dma corruption happened because hardware was simply reusing frames,
> that driver already took to upper stack, since as you said, there is no
> ownership bit.
> 
> You might try your patch with BGMAC_RX_RING_SLOTS = 3 and check if it is
> still working properly.
> 
> I find BGMAC_RX_RING_SLOTS==511 very suspect, and a comment of why this
> magic value is not 512 would be nice.
I think it's probably a valid idea that was implemented in the wrong
way. I'm pretty sure the idea was to only fill 511 descriptors in a ring
with a size of 512. That would eliminate the issue you mentioned above.
I'll send v3 with an extra patch to take care of this.

- Felix
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafał Miłecki April 12, 2015, 8:29 p.m. UTC | #5
On 12 April 2015 at 21:25, Felix Fietkau <nbd@openwrt.org> wrote:
> On 2015-04-12 21:04, Eric Dumazet wrote:
>> On Sun, 2015-04-12 at 20:46 +0200, Felix Fietkau wrote:
>>> On 2015-04-12 20:09, Eric Dumazet wrote:
>>
>>> >
>>> > Given the fact that previous driver kind of worked, have you tried not
>>> > doing the modulo at all here ?
>>> The previous driver was causing reproducible DMA corruption issues under
>>> load. My patch fixes this, and I've verified this with hours of stress
>>> testing (previously I could reproduce the issue in minutes).
>>>
>>> > ring->end = desc_idx + 1;
>>> >
>>> > It looks like the hardware accepted setting BGMAC_DMA_RX_INDEX to
>>> > ring->index_base + ring->num_slots * sizeof(struct bgmac_dma_desc)
>>> The hardware accepts it, but with that value, it considers all rx slot
>>> entries filled, there's no CPU ownership bit in the descriptor.
>>> According to documentation, that register indicates the first *invalid*
>>> rx descriptor (i.e. one where the CPU hasn't posted a valid buffer yet).
>>>
>>> > So apparently the presence of BGMAC_DESC_CTL0_EOT in last rx desc was
>>> > enough to avoid any problem with this.
>>> EOT serves a completely different purpose. It simply indicates the end
>>> of the ring, telling the hardware to go back to the beginning for the
>>> next descriptor.
>>
>> Right, but after your patch, we program the hardware with end == start
>> == 0
>>
>> So I do not really understand how the hardware can still deliver first
>> frame.
>>
>> If it was really taking care of 'end' value, then it should not even
>> deliver one frame.
> Good point.
>
>> The dma corruption happened because hardware was simply reusing frames,
>> that driver already took to upper stack, since as you said, there is no
>> ownership bit.
>>
>> You might try your patch with BGMAC_RX_RING_SLOTS = 3 and check if it is
>> still working properly.
>>
>> I find BGMAC_RX_RING_SLOTS==511 very suspect, and a comment of why this
>> magic value is not 512 would be nice.
> I think it's probably a valid idea that was implemented in the wrong
> way. I'm pretty sure the idea was to only fill 511 descriptors in a ring
> with a size of 512. That would eliminate the issue you mentioned above.
> I'll send v3 with an extra patch to take care of this.

I don't recall differences between Broadcom's code and bgmac now.
However it could be that Broadcom used BGMAC_RX_RING_SLOTS==511 for
the same/similar purpose I did following:
/* Always keep one slot free to allow detecting bugged calls. */
if (--free_slots == 1)
        netif_stop_queue(net_dev);
Felix Fietkau April 12, 2015, 9:38 p.m. UTC | #6
On 2015-04-12 21:04, Eric Dumazet wrote:
> On Sun, 2015-04-12 at 20:46 +0200, Felix Fietkau wrote:
>> On 2015-04-12 20:09, Eric Dumazet wrote:
> 
>> > 
>> > Given the fact that previous driver kind of worked, have you tried not
>> > doing the modulo at all here ?
>> The previous driver was causing reproducible DMA corruption issues under
>> load. My patch fixes this, and I've verified this with hours of stress
>> testing (previously I could reproduce the issue in minutes).
>> 
>> > ring->end = desc_idx + 1;
>> > 
>> > It looks like the hardware accepted setting BGMAC_DMA_RX_INDEX to
>> > ring->index_base + ring->num_slots * sizeof(struct bgmac_dma_desc)
>> The hardware accepts it, but with that value, it considers all rx slot
>> entries filled, there's no CPU ownership bit in the descriptor.
>> According to documentation, that register indicates the first *invalid*
>> rx descriptor (i.e. one where the CPU hasn't posted a valid buffer yet).
>> 
>> > So apparently the presence of BGMAC_DESC_CTL0_EOT in last rx desc was
>> > enough to avoid any problem with this.
>> EOT serves a completely different purpose. It simply indicates the end
>> of the ring, telling the hardware to go back to the beginning for the
>> next descriptor.
> 
> Right, but after your patch, we program the hardware with end == start
> == 0
> 
> So I do not really understand how the hardware can still deliver first
> frame.
> 
> If it was really taking care of 'end' value, then it should not even
> deliver one frame.
> 
> The dma corruption happened because hardware was simply reusing frames,
> that driver already took to upper stack, since as you said, there is no
> ownership bit.
> 
> You might try your patch with BGMAC_RX_RING_SLOTS = 3 and check if it is
> still working properly.
> 
> I find BGMAC_RX_RING_SLOTS==511 very suspect, and a comment of why this
> magic value is not 512 would be nice.
I completely reworked the code and simplified handling
BGMAC_DMA_RX_INDEX by setting it to the last filled rx slot (keeping the
entire ring filled). I now get better performance and it stays reliable
even with smaller rings. Sending v3 now.

- Felix
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index e332de8..67993d7 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -362,6 +362,16 @@  static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
 	return 0;
 }
 
+static void bgmac_dma_rx_update_index(struct bgmac *bgmac,
+				      struct bgmac_dma_ring *ring)
+{
+	dma_wmb();
+
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
+		    ring->index_base +
+		    ring->end * sizeof(struct bgmac_dma_desc));
+}
+
 static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
 				    struct bgmac_dma_ring *ring, int desc_idx)
 {
@@ -380,6 +390,8 @@  static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
 	dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
 	dma_desc->ctl0 = cpu_to_le32(ctl0);
 	dma_desc->ctl1 = cpu_to_le32(ctl1);
+
+	ring->end = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
 }
 
 static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
@@ -394,9 +406,7 @@  static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 	end_slot &= BGMAC_DMA_RX_STATDPTR;
 	end_slot /= sizeof(struct bgmac_dma_desc);
 
-	ring->end = end_slot;
-
-	while (ring->start != ring->end) {
+	while (ring->start != end_slot) {
 		struct device *dma_dev = bgmac->core->dma_dev;
 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
 		struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
@@ -468,6 +478,8 @@  static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 			break;
 	}
 
+	bgmac_dma_rx_update_index(bgmac, ring);
+
 	return handled;
 }
 
@@ -690,15 +702,12 @@  static void bgmac_dma_init(struct bgmac *bgmac)
 		if (ring->unaligned)
 			bgmac_dma_rx_enable(bgmac, ring);
 
+		ring->start = 0;
+		ring->end = 0;
 		for (j = 0; j < ring->num_slots; j++)
 			bgmac_dma_rx_setup_desc(bgmac, ring, j);
 
-		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
-			    ring->index_base +
-			    ring->num_slots * sizeof(struct bgmac_dma_desc));
-
-		ring->start = 0;
-		ring->end = 0;
+		bgmac_dma_rx_update_index(bgmac, ring);
 	}
 }