diff mbox

[v4,1/9] bgmac: simplify tx ring index handling

Message ID 1428933162-26763-1-git-send-email-nbd@openwrt.org
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Felix Fietkau April 13, 2015, 1:52 p.m. UTC
Keep incrementing ring->start and ring->end instead of pointing it to
the actual ring slot entry. This simplifies the calculation of the
number of free slots.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 44 ++++++++++++++---------------------
 drivers/net/ethernet/broadcom/bgmac.h |  6 ++---
 2 files changed, 20 insertions(+), 30 deletions(-)

Comments

Rafał Miłecki April 13, 2015, 2:21 p.m. UTC | #1
On 13 April 2015 at 15:52, Felix Fietkau <nbd@openwrt.org> wrote:
> Keep incrementing ring->start and ring->end instead of pointing it to
> the actual ring slot entry. This simplifies the calculation of the
> number of free slots.

You're much more experienced in network stuff, so it's likely me not
understanding some stuff & needing help.


> @@ -158,13 +157,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
>                 skb_checksum_help(skb);
>
>         nr_frags = skb_shinfo(skb)->nr_frags;
> -
> -       if (ring->start <= ring->end)
> -               free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
> -       else
> -               free_slots = ring->start - ring->end;
> -
> -       if (free_slots <= nr_frags + 1) {
> +       if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
>                 bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
>                 netif_stop_queue(net_dev);
>                 return NETDEV_TX_BUSY;

How is this going to work with ring->end lower than ring->start? Let's
say you have 2 empty slots at the end of ring and 2 empty slots at the
beginning. In total 4 free slots. Doing ring->end - ring->start will
give you some big negative number (depending on the ring size), won't
it?


> @@ -284,10 +276,8 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
>                         slot->skb = NULL;
>                 }
>
> -next:
>                 slot->dma_addr = 0;
> -               if (++ring->start >= BGMAC_TX_RING_SLOTS)
> -                       ring->start = 0;
> +               ring->start++;
>                 freed = true;
>         }
>

Do I understand correctly you're using u32 overflow here? Is this
OK/allowed in kernel to knowingly use overflows?


> diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
> index 3ad965f..5a198d5 100644
> --- a/drivers/net/ethernet/broadcom/bgmac.h
> +++ b/drivers/net/ethernet/broadcom/bgmac.h
> @@ -414,10 +414,10 @@ enum bgmac_dma_ring_type {
>   * empty.
>   */
>  struct bgmac_dma_ring {
> -       u16 num_slots;
> -       u16 start;
> -       u16 end;
> +       u32 start;
> +       u32 end;
>
> +       u16 num_slots;
>         u16 mmio_base;
>         struct bgmac_dma_desc *cpu_base;
>         dma_addr_t dma_base;

Any reason for u32 instead of u16?
Felix Fietkau April 13, 2015, 3:01 p.m. UTC | #2
On 2015-04-13 16:21, Rafał Miłecki wrote:
>> @@ -158,13 +157,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
>>                 skb_checksum_help(skb);
>>
>>         nr_frags = skb_shinfo(skb)->nr_frags;
>> -
>> -       if (ring->start <= ring->end)
>> -               free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
>> -       else
>> -               free_slots = ring->start - ring->end;
>> -
>> -       if (free_slots <= nr_frags + 1) {
>> +       if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
>>                 bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
>>                 netif_stop_queue(net_dev);
>>                 return NETDEV_TX_BUSY;
> 
> How is this going to work with ring->end lower than ring->start? Let's
> say you have 2 empty slots at the end of ring and 2 empty slots at the
> beginning. In total 4 free slots. Doing ring->end - ring->start will
> give you some big negative number (depending on the ring size), won't
> it?
It won't, because the resulting data type is limited to 32 bit. I'm
pretty sure there are other places in the kernel that rely on the same
behavior.

>> @@ -284,10 +276,8 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
>>                         slot->skb = NULL;
>>                 }
>>
>> -next:
>>                 slot->dma_addr = 0;
>> -               if (++ring->start >= BGMAC_TX_RING_SLOTS)
>> -                       ring->start = 0;
>> +               ring->start++;
>>                 freed = true;
>>         }
>>
> 
> Do I understand correctly you're using u32 overflow here? Is this
> OK/allowed in kernel to knowingly use overflows?
I think so.

>> diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
>> index 3ad965f..5a198d5 100644
>> --- a/drivers/net/ethernet/broadcom/bgmac.h
>> +++ b/drivers/net/ethernet/broadcom/bgmac.h
>> @@ -414,10 +414,10 @@ enum bgmac_dma_ring_type {
>>   * empty.
>>   */
>>  struct bgmac_dma_ring {
>> -       u16 num_slots;
>> -       u16 start;
>> -       u16 end;
>> +       u32 start;
>> +       u32 end;
>>
>> +       u16 num_slots;
>>         u16 mmio_base;
>>         struct bgmac_dma_desc *cpu_base;
>>         dma_addr_t dma_base;
> 
> Any reason for u32 instead of u16?
To avoid writes touching other fields close to it.

- Felix
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafał Miłecki April 13, 2015, 6:05 p.m. UTC | #3
On 13 April 2015 at 17:01, Felix Fietkau <nbd@openwrt.org> wrote:
> On 2015-04-13 16:21, Rafał Miłecki wrote:
>>> @@ -158,13 +157,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
>>>                 skb_checksum_help(skb);
>>>
>>>         nr_frags = skb_shinfo(skb)->nr_frags;
>>> -
>>> -       if (ring->start <= ring->end)
>>> -               free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
>>> -       else
>>> -               free_slots = ring->start - ring->end;
>>> -
>>> -       if (free_slots <= nr_frags + 1) {
>>> +       if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
>>>                 bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
>>>                 netif_stop_queue(net_dev);
>>>                 return NETDEV_TX_BUSY;
>>
>> How is this going to work with ring->end lower than ring->start? Let's
>> say you have 2 empty slots at the end of ring and 2 empty slots at the
>> beginning. In total 4 free slots. Doing ring->end - ring->start will
>> give you some big negative number (depending on the ring size), won't
>> it?
> It won't, because the resulting data type is limited to 32 bit. I'm
> pretty sure there are other places in the kernel that rely on the same
> behavior.

I'm not really sure about this.

I modified driver in the following way to test ring behavior better:
#define BGMAC_TX_RING_SLOTS 16
#define BGMAC_RX_RING_SLOTS 32
u8 start;
u8 end;

And then I added following code to bgmac.c for debugging:
if (ring->end < ring->start)
pr_info("ring->end:0x%08x ring->start:0x%02x (ring->end - ring->start
+ nr_frags + 1):%d\n", ring->end, ring->start, ring->end - ring->start
+ nr_frags + 1);

This is what I got:
[   35.516290] bgmac: ring->end:0x00000000 ring->start:0xfd (ring->end
- ring->start + nr_frags + 1):-252
[   58.058634] bgmac: ring->end:0x00000000 ring->start:0xff (ring->end
- ring->start + nr_frags + 1):-254
[   62.190323] bgmac: ring->end:0x00000000 ring->start:0xff (ring->end
- ring->start + nr_frags + 1):-254
[   71.659957] bgmac: ring->end:0x00000000 ring->start:0xff (ring->end
- ring->start + nr_frags + 1):-254
[   83.029964] bgmac: ring->end:0x00000000 ring->start:0xfe (ring->end
- ring->start + nr_frags + 1):-253
[   99.519693] bgmac: ring->end:0x00000000 ring->start:0xff (ring->end
- ring->start + nr_frags + 1):-254
[  105.751245] bgmac: ring->end:0x00000000 ring->start:0xfe (ring->end
- ring->start + nr_frags + 1):-253
[  139.006470] bgmac: ring->end:0x00000000 ring->start:0xfd (ring->end
- ring->start + nr_frags + 1):-252
[  143.496619] bgmac: ring->end:0x00000000 ring->start:0xfd (ring->end
- ring->start + nr_frags + 1):-252
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index fa8f9e1..73374b4 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -142,11 +142,10 @@  static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 {
 	struct device *dma_dev = bgmac->core->dma_dev;
 	struct net_device *net_dev = bgmac->net_dev;
-	struct bgmac_slot_info *slot = &ring->slots[ring->end];
-	int free_slots;
+	int index = ring->end % BGMAC_TX_RING_SLOTS;
+	struct bgmac_slot_info *slot = &ring->slots[index];
 	int nr_frags;
 	u32 flags;
-	int index = ring->end;
 	int i;
 
 	if (skb->len > BGMAC_DESC_CTL1_LEN) {
@@ -158,13 +157,7 @@  static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 		skb_checksum_help(skb);
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
-
-	if (ring->start <= ring->end)
-		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
-	else
-		free_slots = ring->start - ring->end;
-
-	if (free_slots <= nr_frags + 1) {
+	if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
 		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
 		netif_stop_queue(net_dev);
 		return NETDEV_TX_BUSY;
@@ -200,7 +193,7 @@  static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	}
 
 	slot->skb = skb;
-
+	ring->end += nr_frags + 1;
 	netdev_sent_queue(net_dev, skb->len);
 
 	wmb();
@@ -208,13 +201,12 @@  static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	/* Increase ring->end to point empty slot. We tell hardware the first
 	 * slot it should *not* read.
 	 */
-	ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
 		    ring->index_base +
-		    ring->end * sizeof(struct bgmac_dma_desc));
+		    (ring->end % BGMAC_TX_RING_SLOTS) *
+		    sizeof(struct bgmac_dma_desc));
 
-	free_slots -= nr_frags + 1;
-	if (free_slots < 8)
+	if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8)
 		netif_stop_queue(net_dev);
 
 	return NETDEV_TX_OK;
@@ -256,17 +248,17 @@  static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 	empty_slot &= BGMAC_DMA_TX_STATDPTR;
 	empty_slot /= sizeof(struct bgmac_dma_desc);
 
-	while (ring->start != empty_slot) {
-		struct bgmac_slot_info *slot = &ring->slots[ring->start];
-		u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
-		int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+	while (ring->start != ring->end) {
+		int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
+		struct bgmac_slot_info *slot = &ring->slots[slot_idx];
+		u32 ctl1;
+		int len;
 
-		if (!slot->dma_addr) {
-			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
-				  ring->start, ring->end);
-			goto next;
-		}
+		if (slot_idx == empty_slot)
+			break;
 
+		ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
+		len = ctl1 & BGMAC_DESC_CTL1_LEN;
 		if (ctl1 & BGMAC_DESC_CTL0_SOF)
 			/* Unmap no longer used buffer */
 			dma_unmap_single(dma_dev, slot->dma_addr, len,
@@ -284,10 +276,8 @@  static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 			slot->skb = NULL;
 		}
 
-next:
 		slot->dma_addr = 0;
-		if (++ring->start >= BGMAC_TX_RING_SLOTS)
-			ring->start = 0;
+		ring->start++;
 		freed = true;
 	}
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 3ad965f..5a198d5 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -414,10 +414,10 @@  enum bgmac_dma_ring_type {
  * empty.
  */
 struct bgmac_dma_ring {
-	u16 num_slots;
-	u16 start;
-	u16 end;
+	u32 start;
+	u32 end;
 
+	u16 num_slots;
 	u16 mmio_base;
 	struct bgmac_dma_desc *cpu_base;
 	dma_addr_t dma_base;