[net-next,v3,3/4] gve: Rx Buffer Recycling

Message ID	20200924010104.3196839-4-awogbemila@google.com
State	Changes Requested
Delegated to:	David Miller
Headers	show Return-Path: <netdev-owner@vger.kernel.org> Sender: "awogbemila via sendgmr" <awogbemila@awogbemila.sea.corp.google.com> Date: Wed, 23 Sep 2020 18:01:03 -0700 In-Reply-To: <20200924010104.3196839-1-awogbemila@google.com> Message-Id: <20200924010104.3196839-4-awogbemila@google.com> Mime-Version: 1.0 References: <20200924010104.3196839-1-awogbemila@google.com> Subject: [PATCH net-next v3 3/4] gve: Rx Buffer Recycling From: David Awogbemila <awogbemila@google.com> To: netdev@vger.kernel.org Cc: David Awogbemila <awogbemila@google.com> Content-Type: text/plain; charset="UTF-8" Precedence: bulk
Series	GVE Raw Addressing. \| expand [net-next,v3,0/4] GVE Raw Addressing. [net-next,v3,1/4] gve: Add support for raw addressing device option [net-next,v3,2/4] gve: Add support for raw addressing to the rx path [net-next,v3,3/4] gve: Rx Buffer Recycling [net-next,v3,4/4] gve: Add support for raw addressing in the tx path

Message ID

20200924010104.3196839-4-awogbemila@google.com

State

Changes Requested

Delegated to:

David Miller

Headers

Sender: "awogbemila via sendgmr" <awogbemila@awogbemila.sea.corp.google.com>
Date: Wed, 23 Sep 2020 18:01:03 -0700
In-Reply-To: <20200924010104.3196839-1-awogbemila@google.com>
Message-Id: <20200924010104.3196839-4-awogbemila@google.com>
Mime-Version: 1.0
References: <20200924010104.3196839-1-awogbemila@google.com>
Subject: [PATCH net-next v3 3/4] gve: Rx Buffer Recycling
From: David Awogbemila <awogbemila@google.com>
To: netdev@vger.kernel.org
Cc: David Awogbemila <awogbemila@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk

Series

GVE Raw Addressing. | expand

Commit Message

David Awogbemila Sept. 24, 2020, 1:01 a.m. UTC

This patch lets the driver reuse buffers that have been freed by the
networking stack.

In the raw addressing case, this allows the driver avoid allocating new
buffers.
In the qpl case, the driver can avoid copies.

Signed-off-by: David Awogbemila <awogbemila@google.com>
---
 drivers/net/ethernet/google/gve/gve.h    |  10 +-
 drivers/net/ethernet/google/gve/gve_rx.c | 197 +++++++++++++++--------
 2 files changed, 133 insertions(+), 74 deletions(-)

Comments

Jakub Kicinski Sept. 24, 2020, 11 p.m. UTC | #1

On Wed, 23 Sep 2020 18:01:03 -0700 David Awogbemila wrote:
> This patch lets the driver reuse buffers that have been freed by the
> networking stack.
> 
> In the raw addressing case, this allows the driver avoid allocating new
> buffers.
> In the qpl case, the driver can avoid copies.
> 
> Signed-off-by: David Awogbemila <awogbemila@google.com>
> ---
>  drivers/net/ethernet/google/gve/gve.h    |  10 +-
>  drivers/net/ethernet/google/gve/gve_rx.c | 197 +++++++++++++++--------
>  2 files changed, 133 insertions(+), 74 deletions(-)
> 
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index b853efb0b17f..9cce2b356235 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -50,6 +50,7 @@ struct gve_rx_slot_page_info {
>  	struct page *page;
>  	void *page_address;
>  	u32 page_offset; /* offset to write to in page */
> +	bool can_flip;
>  };
>  
>  /* A list of pages registered with the device during setup and used by a queue
> @@ -505,15 +506,6 @@ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
>  		return DMA_FROM_DEVICE;
>  }
>  
> -/* Returns true if the max mtu allows page recycling */
> -static inline bool gve_can_recycle_pages(struct net_device *dev)
> -{
> -	/* We can't recycle the pages if we can't fit a packet into half a
> -	 * page.
> -	 */
> -	return dev->max_mtu <= PAGE_SIZE / 2;
> -}
> -
>  /* buffers */
>  int gve_alloc_page(struct gve_priv *priv, struct device *dev,
>  		   struct page **page, dma_addr_t *dma,
> diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
> index ae76d2547d13..bea483db28f5 100644
> --- a/drivers/net/ethernet/google/gve/gve_rx.c
> +++ b/drivers/net/ethernet/google/gve/gve_rx.c
> @@ -263,8 +263,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
>  	return PKT_HASH_TYPE_L2;
>  }
>  
> -static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
> -				   struct net_device *dev,
> +static struct sk_buff *gve_rx_copy(struct net_device *dev,
>  				   struct napi_struct *napi,
>  				   struct gve_rx_slot_page_info *page_info,
>  				   u16 len)
> @@ -282,10 +281,6 @@ static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
>  
>  	skb->protocol = eth_type_trans(skb, dev);
>  
> -	u64_stats_update_begin(&rx->statss);
> -	rx->rx_copied_pkt++;
> -	u64_stats_update_end(&rx->statss);
> -
>  	return skb;
>  }
>  
> @@ -331,22 +326,91 @@ static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
>  {
>  	u64 addr = be64_to_cpu(data_ring->addr);
>  
> +	/* "flip" to other packet buffer on this page */
>  	page_info->page_offset ^= PAGE_SIZE / 2;
>  	addr ^= PAGE_SIZE / 2;
>  	data_ring->addr = cpu_to_be64(addr);
>  }
>  
> +static bool gve_rx_can_flip_buffers(struct net_device *netdev)
> +{
> +#if PAGE_SIZE == 4096
> +	/* We can't flip a buffer if we can't fit a packet
> +	 * into half a page.
> +	 */
> +	if (netdev->max_mtu + GVE_RX_PAD + ETH_HLEN  > PAGE_SIZE / 2)

double space

> +		return false;
> +	return true;

Flip the condition and just return it.

return netdev->max_mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2

Also you should probably look at mtu not max_mtu. More likely to be in
cache.

> +#else
> +	/* PAGE_SIZE != 4096 - don't try to reuse */
> +	return false;
> +#endif
> +}
> +
> +static int gve_rx_can_recycle_buffer(struct page *page)
> +{
> +	int pagecount = page_count(page);
> +
> +	/* This page is not being used by any SKBs - reuse */
> +	if (pagecount == 1) {
> +		return 1;
> +	/* This page is still being used by an SKB - we can't reuse */
> +	} else if (pagecount >= 2) {
> +		return 0;
> +	}

parenthesis not necessary around single line statements.

> +	WARN(pagecount < 1, "Pagecount should never be < 1");
> +	return -1;
> +}
> +
>  static struct sk_buff *
>  gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
>  		      struct gve_rx_slot_page_info *page_info, u16 len,
>  		      struct napi_struct *napi,
> -		      struct gve_rx_data_slot *data_slot)
> +		      struct gve_rx_data_slot *data_slot, bool can_flip)
>  {
>  	struct sk_buff *skb = gve_rx_add_frags(napi, page_info, len);

IMHO it looks weird when a function is called on variable init and then
error checking is done after an empty line.

>  	if (!skb)
>  		return NULL;
>  
> +	/* Optimistically stop the kernel from freeing the page by increasing
> +	 * the page bias. We will check the refcount in refill to determine if
> +	 * we need to alloc a new page.
> +	 */
> +	get_page(page_info->page);
> +	page_info->can_flip = can_flip;
> +
> +	return skb;
> +}
> +
> +static struct sk_buff *
> +gve_rx_qpl(struct device *dev, struct net_device *netdev,
> +	   struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
> +	   u16 len, struct napi_struct *napi,
> +	   struct gve_rx_data_slot *data_slot, bool recycle)
> +{
> +	struct sk_buff *skb;

empty line here

> +	/* if raw_addressing mode is not enabled gvnic can only receive into
> +	 * registered segmens. If the buffer can't be recycled, our only

segments?

> +	 * choice is to copy the data out of it so that we can return it to the
> +	 * device.
> +	 */
> +	if (recycle) {
> +		skb = gve_rx_add_frags(napi, page_info, len);
> +		/* No point in recycling if we didn't get the skb */
> +		if (skb) {
> +			/* Make sure the networking stack can't free the page */
> +			get_page(page_info->page);
> +			gve_rx_flip_buff(page_info, data_slot);
> +		}
> +	} else {
> +		skb = gve_rx_copy(netdev, napi, page_info, len);
> +		if (skb) {
> +			u64_stats_update_begin(&rx->statss);
> +			rx->rx_copied_pkt++;
> +			u64_stats_update_end(&rx->statss);
> +		}
> +	}
>  	return skb;
>  }

> @@ -490,14 +525,46 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
>  
>  	while ((fill_cnt & rx->mask) != (rx->cnt & rx->mask)) {
>  		u32 idx = fill_cnt & rx->mask;
> -		struct gve_rx_slot_page_info *page_info = &rx->data.page_info[idx];
> -		struct gve_rx_data_slot *data_slot = &rx->data.data_ring[idx];
> -		struct device *dev = &priv->pdev->dev;
> +		struct gve_rx_slot_page_info *page_info =
> +						&rx->data.page_info[idx];
>  
> -		gve_rx_free_buffer(dev, page_info, data_slot);
> -		page_info->page = NULL;
> -		if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot, rx))
> -			break;
> +		if (page_info->can_flip) {
> +			/* The other half of the page is free because it was
> +			 * free when we processed the descriptor. Flip to it.
> +			 */
> +			struct gve_rx_data_slot *data_slot =
> +						&rx->data.data_ring[idx];
> +
> +			gve_rx_flip_buff(page_info, data_slot);
> +			page_info->can_flip = false;
> +		} else {
> +			/* It is possible that the networking stack has already
> +			 * finished processing all outstanding packets in the buffer
> +			 * and it can be reused.
> +			 * Flipping is unnecessary here - if the networking stack still
> +			 * owns half the page it is impossible to tell which half. Either
> +			 * the whole page is free or it needs to be replaced.
> +			 */
> +			int recycle = gve_rx_can_recycle_buffer(page_info->page);
> +
> +			if (recycle < 0) {
> +				gve_schedule_reset(priv);
> +				return false;
> +			}
> +			if (!recycle) {
> +				/* We can't reuse the buffer - alloc a new one*/
> +				struct gve_rx_data_slot *data_slot =
> +						&rx->data.data_ring[idx];
> +				struct device *dev = &priv->pdev->dev;
> +
> +				gve_rx_free_buffer(dev, page_info, data_slot);
> +				page_info->page = NULL;
> +				if (gve_rx_alloc_buffer(priv, dev, page_info,
> +							data_slot, rx)) {
> +					break;

What if the queue runs completely dry during memory shortage? 
You need some form of delayed work to periodically refill 
the free buffers, right?

> +				}

parens unnecessary

> +			}
> +		}
>  		fill_cnt++;
>  	}
>  	rx->fill_cnt = fill_cnt;

David Awogbemila Sept. 30, 2020, 4:10 p.m. UTC | #2

On Thu, Sep 24, 2020 at 4:01 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 23 Sep 2020 18:01:03 -0700 David Awogbemila wrote:
> > This patch lets the driver reuse buffers that have been freed by the
> > networking stack.
> >
> > In the raw addressing case, this allows the driver avoid allocating new
> > buffers.
> > In the qpl case, the driver can avoid copies.
> >
> > Signed-off-by: David Awogbemila <awogbemila@google.com>
> > ---
> >  drivers/net/ethernet/google/gve/gve.h    |  10 +-
> >  drivers/net/ethernet/google/gve/gve_rx.c | 197 +++++++++++++++--------
> >  2 files changed, 133 insertions(+), 74 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> > index b853efb0b17f..9cce2b356235 100644
> > --- a/drivers/net/ethernet/google/gve/gve.h
> > +++ b/drivers/net/ethernet/google/gve/gve.h
> > @@ -50,6 +50,7 @@ struct gve_rx_slot_page_info {
> >       struct page *page;
> >       void *page_address;
> >       u32 page_offset; /* offset to write to in page */
> > +     bool can_flip;
> >  };
> >
> >  /* A list of pages registered with the device during setup and used by a queue
> > @@ -505,15 +506,6 @@ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
> >               return DMA_FROM_DEVICE;
> >  }
> >
> > -/* Returns true if the max mtu allows page recycling */
> > -static inline bool gve_can_recycle_pages(struct net_device *dev)
> > -{
> > -     /* We can't recycle the pages if we can't fit a packet into half a
> > -      * page.
> > -      */
> > -     return dev->max_mtu <= PAGE_SIZE / 2;
> > -}
> > -
> >  /* buffers */
> >  int gve_alloc_page(struct gve_priv *priv, struct device *dev,
> >                  struct page **page, dma_addr_t *dma,
> > diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
> > index ae76d2547d13..bea483db28f5 100644
> > --- a/drivers/net/ethernet/google/gve/gve_rx.c
> > +++ b/drivers/net/ethernet/google/gve/gve_rx.c
> > @@ -263,8 +263,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
> >       return PKT_HASH_TYPE_L2;
> >  }
> >
> > -static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
> > -                                struct net_device *dev,
> > +static struct sk_buff *gve_rx_copy(struct net_device *dev,
> >                                  struct napi_struct *napi,
> >                                  struct gve_rx_slot_page_info *page_info,
> >                                  u16 len)
> > @@ -282,10 +281,6 @@ static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
> >
> >       skb->protocol = eth_type_trans(skb, dev);
> >
> > -     u64_stats_update_begin(&rx->statss);
> > -     rx->rx_copied_pkt++;
> > -     u64_stats_update_end(&rx->statss);
> > -
> >       return skb;
> >  }
> >
> > @@ -331,22 +326,91 @@ static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
> >  {
> >       u64 addr = be64_to_cpu(data_ring->addr);
> >
> > +     /* "flip" to other packet buffer on this page */
> >       page_info->page_offset ^= PAGE_SIZE / 2;
> >       addr ^= PAGE_SIZE / 2;
> >       data_ring->addr = cpu_to_be64(addr);
> >  }
> >
> > +static bool gve_rx_can_flip_buffers(struct net_device *netdev)
> > +{
> > +#if PAGE_SIZE == 4096
> > +     /* We can't flip a buffer if we can't fit a packet
> > +      * into half a page.
> > +      */
> > +     if (netdev->max_mtu + GVE_RX_PAD + ETH_HLEN  > PAGE_SIZE / 2)
>
> double space

I'll adjust this.

> > +             return false;
> > +     return true;
>
> Flip the condition and just return it.
>
> return netdev->max_mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2
>
> Also you should probably look at mtu not max_mtu. More likely to be in
> cache.

Ok, I'll adjust this to use mtu instead.

> > +#else
> > +     /* PAGE_SIZE != 4096 - don't try to reuse */
> > +     return false;
> > +#endif
> > +}
> > +
> > +static int gve_rx_can_recycle_buffer(struct page *page)
> > +{
> > +     int pagecount = page_count(page);
> > +
> > +     /* This page is not being used by any SKBs - reuse */
> > +     if (pagecount == 1) {
> > +             return 1;
> > +     /* This page is still being used by an SKB - we can't reuse */
> > +     } else if (pagecount >= 2) {
> > +             return 0;
> > +     }
>
> parenthesis not necessary around single line statements.

I'll adjust this.

> > +     WARN(pagecount < 1, "Pagecount should never be < 1");
> > +     return -1;
> > +}
> > +
> >  static struct sk_buff *
> >  gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
> >                     struct gve_rx_slot_page_info *page_info, u16 len,
> >                     struct napi_struct *napi,
> > -                   struct gve_rx_data_slot *data_slot)
> > +                   struct gve_rx_data_slot *data_slot, bool can_flip)
> >  {
> >       struct sk_buff *skb = gve_rx_add_frags(napi, page_info, len);
>
> IMHO it looks weird when a function is called on variable init and then
> error checking is done after an empty line.

Ok, I'll declare skb and then initialize it separately.

> >       if (!skb)
> >               return NULL;
> >
> > +     /* Optimistically stop the kernel from freeing the page by increasing
> > +      * the page bias. We will check the refcount in refill to determine if
> > +      * we need to alloc a new page.
> > +      */
> > +     get_page(page_info->page);
> > +     page_info->can_flip = can_flip;
> > +
> > +     return skb;
> > +}
> > +
> > +static struct sk_buff *
> > +gve_rx_qpl(struct device *dev, struct net_device *netdev,
> > +        struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
> > +        u16 len, struct napi_struct *napi,
> > +        struct gve_rx_data_slot *data_slot, bool recycle)
> > +{
> > +     struct sk_buff *skb;
>
> empty line here

I'll adjust this.

> > +     /* if raw_addressing mode is not enabled gvnic can only receive into
> > +      * registered segmens. If the buffer can't be recycled, our only
>
> segments?

I'll correct this.

> > +      * choice is to copy the data out of it so that we can return it to the
> > +      * device.
> > +      */
> > +     if (recycle) {
> > +             skb = gve_rx_add_frags(napi, page_info, len);
> > +             /* No point in recycling if we didn't get the skb */
> > +             if (skb) {
> > +                     /* Make sure the networking stack can't free the page */
> > +                     get_page(page_info->page);
> > +                     gve_rx_flip_buff(page_info, data_slot);
> > +             }
> > +     } else {
> > +             skb = gve_rx_copy(netdev, napi, page_info, len);
> > +             if (skb) {
> > +                     u64_stats_update_begin(&rx->statss);
> > +                     rx->rx_copied_pkt++;
> > +                     u64_stats_update_end(&rx->statss);
> > +             }
> > +     }
> >       return skb;
> >  }
>
> > @@ -490,14 +525,46 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
> >
> >       while ((fill_cnt & rx->mask) != (rx->cnt & rx->mask)) {
> >               u32 idx = fill_cnt & rx->mask;
> > -             struct gve_rx_slot_page_info *page_info = &rx->data.page_info[idx];
> > -             struct gve_rx_data_slot *data_slot = &rx->data.data_ring[idx];
> > -             struct device *dev = &priv->pdev->dev;
> > +             struct gve_rx_slot_page_info *page_info =
> > +                                             &rx->data.page_info[idx];
> >
> > -             gve_rx_free_buffer(dev, page_info, data_slot);
> > -             page_info->page = NULL;
> > -             if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot, rx))
> > -                     break;
> > +             if (page_info->can_flip) {
> > +                     /* The other half of the page is free because it was
> > +                      * free when we processed the descriptor. Flip to it.
> > +                      */
> > +                     struct gve_rx_data_slot *data_slot =
> > +                                             &rx->data.data_ring[idx];
> > +
> > +                     gve_rx_flip_buff(page_info, data_slot);
> > +                     page_info->can_flip = false;
> > +             } else {
> > +                     /* It is possible that the networking stack has already
> > +                      * finished processing all outstanding packets in the buffer
> > +                      * and it can be reused.
> > +                      * Flipping is unnecessary here - if the networking stack still
> > +                      * owns half the page it is impossible to tell which half. Either
> > +                      * the whole page is free or it needs to be replaced.
> > +                      */
> > +                     int recycle = gve_rx_can_recycle_buffer(page_info->page);
> > +
> > +                     if (recycle < 0) {
> > +                             gve_schedule_reset(priv);
> > +                             return false;
> > +                     }
> > +                     if (!recycle) {
> > +                             /* We can't reuse the buffer - alloc a new one*/
> > +                             struct gve_rx_data_slot *data_slot =
> > +                                             &rx->data.data_ring[idx];
> > +                             struct device *dev = &priv->pdev->dev;
> > +
> > +                             gve_rx_free_buffer(dev, page_info, data_slot);
> > +                             page_info->page = NULL;
> > +                             if (gve_rx_alloc_buffer(priv, dev, page_info,
> > +                                                     data_slot, rx)) {
> > +                                     break;
>
> What if the queue runs completely dry during memory shortage?
> You need some form of delayed work to periodically refill
> the free buffers, right?

Thanks, this looks like it will require modifications that will need
to be well tested.
Just for my own curiosity, how common of an occurrence are such memory
shortages?

>
> > +                             }
>
> parens unnecessary

I'll adjust this.

Jakub Kicinski Sept. 30, 2020, 9:23 p.m. UTC | #3

On Wed, 30 Sep 2020 09:10:02 -0700 David Awogbemila wrote:
> > What if the queue runs completely dry during memory shortage?
> > You need some form of delayed work to periodically refill
> > the free buffers, right?  
> 
> Thanks, this looks like it will require modifications that will need
> to be well tested.
> Just for my own curiosity, how common of an occurrence are such memory
> shortages?

Depends on the workload. For some apps they are very common.

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index b853efb0b17f..9cce2b356235 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -50,6 +50,7 @@  struct gve_rx_slot_page_info {
 	struct page *page;
 	void *page_address;
 	u32 page_offset; /* offset to write to in page */
+	bool can_flip;
 };
 
 /* A list of pages registered with the device during setup and used by a queue
@@ -505,15 +506,6 @@  static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
 		return DMA_FROM_DEVICE;
 }
 
-/* Returns true if the max mtu allows page recycling */
-static inline bool gve_can_recycle_pages(struct net_device *dev)
-{
-	/* We can't recycle the pages if we can't fit a packet into half a
-	 * page.
-	 */
-	return dev->max_mtu <= PAGE_SIZE / 2;
-}
-
 /* buffers */
 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
 		   struct page **page, dma_addr_t *dma,
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index ae76d2547d13..bea483db28f5 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -263,8 +263,7 @@  static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
 	return PKT_HASH_TYPE_L2;
 }
 
-static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
-				   struct net_device *dev,
+static struct sk_buff *gve_rx_copy(struct net_device *dev,
 				   struct napi_struct *napi,
 				   struct gve_rx_slot_page_info *page_info,
 				   u16 len)
@@ -282,10 +281,6 @@  static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
 
 	skb->protocol = eth_type_trans(skb, dev);
 
-	u64_stats_update_begin(&rx->statss);
-	rx->rx_copied_pkt++;
-	u64_stats_update_end(&rx->statss);
-
 	return skb;
 }
 
@@ -331,22 +326,91 @@  static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
 {
 	u64 addr = be64_to_cpu(data_ring->addr);
 
+	/* "flip" to other packet buffer on this page */
 	page_info->page_offset ^= PAGE_SIZE / 2;
 	addr ^= PAGE_SIZE / 2;
 	data_ring->addr = cpu_to_be64(addr);
 }
 
+static bool gve_rx_can_flip_buffers(struct net_device *netdev)
+{
+#if PAGE_SIZE == 4096
+	/* We can't flip a buffer if we can't fit a packet
+	 * into half a page.
+	 */
+	if (netdev->max_mtu + GVE_RX_PAD + ETH_HLEN  > PAGE_SIZE / 2)
+		return false;
+	return true;
+#else
+	/* PAGE_SIZE != 4096 - don't try to reuse */
+	return false;
+#endif
+}
+
+static int gve_rx_can_recycle_buffer(struct page *page)
+{
+	int pagecount = page_count(page);
+
+	/* This page is not being used by any SKBs - reuse */
+	if (pagecount == 1) {
+		return 1;
+	/* This page is still being used by an SKB - we can't reuse */
+	} else if (pagecount >= 2) {
+		return 0;
+	}
+	WARN(pagecount < 1, "Pagecount should never be < 1");
+	return -1;
+}
+
 static struct sk_buff *
 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
 		      struct gve_rx_slot_page_info *page_info, u16 len,
 		      struct napi_struct *napi,
-		      struct gve_rx_data_slot *data_slot)
+		      struct gve_rx_data_slot *data_slot, bool can_flip)
 {
 	struct sk_buff *skb = gve_rx_add_frags(napi, page_info, len);
 
 	if (!skb)
 		return NULL;
 
+	/* Optimistically stop the kernel from freeing the page by increasing
+	 * the page bias. We will check the refcount in refill to determine if
+	 * we need to alloc a new page.
+	 */
+	get_page(page_info->page);
+	page_info->can_flip = can_flip;
+
+	return skb;
+}
+
+static struct sk_buff *
+gve_rx_qpl(struct device *dev, struct net_device *netdev,
+	   struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
+	   u16 len, struct napi_struct *napi,
+	   struct gve_rx_data_slot *data_slot, bool recycle)
+{
+	struct sk_buff *skb;
+	/* if raw_addressing mode is not enabled gvnic can only receive into
+	 * registered segmens. If the buffer can't be recycled, our only
+	 * choice is to copy the data out of it so that we can return it to the
+	 * device.
+	 */
+	if (recycle) {
+		skb = gve_rx_add_frags(napi, page_info, len);
+		/* No point in recycling if we didn't get the skb */
+		if (skb) {
+			/* Make sure the networking stack can't free the page */
+			get_page(page_info->page);
+			gve_rx_flip_buff(page_info, data_slot);
+		}
+	} else {
+		skb = gve_rx_copy(netdev, napi, page_info, len);
+		if (skb) {
+			u64_stats_update_begin(&rx->statss);
+			rx->rx_copied_pkt++;
+			u64_stats_update_end(&rx->statss);
+		}
+	}
 	return skb;
 }
 
@@ -360,7 +424,6 @@  static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 	struct gve_rx_data_slot *data_slot;
 	struct sk_buff *skb = NULL;
 	dma_addr_t page_bus;
-	int pagecount;
 	u16 len;
 
 	/* drop this packet */
@@ -381,64 +444,36 @@  static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 	dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
 				PAGE_SIZE, DMA_FROM_DEVICE);
 
-	if (PAGE_SIZE == 4096) {
-		if (len <= priv->rx_copybreak) {
-			/* Just copy small packets */
-			skb = gve_rx_copy(rx, dev, napi, page_info, len);
-			u64_stats_update_begin(&rx->statss);
-			rx->rx_copybreak_pkt++;
-			u64_stats_update_end(&rx->statss);
-			goto have_skb;
+	if (len <= priv->rx_copybreak) {
+		/* Just copy small packets */
+		skb = gve_rx_copy(dev, napi, page_info, len);
+		u64_stats_update_begin(&rx->statss);
+		rx->rx_copied_pkt++;
+		rx->rx_copybreak_pkt++;
+		u64_stats_update_end(&rx->statss);
+	} else {
+		bool can_flip = gve_rx_can_flip_buffers(dev);
+		int recycle = 0;
+
+		if (can_flip) {
+			recycle = gve_rx_can_recycle_buffer(page_info->page);
+			if (recycle < 0) {
+				gve_schedule_reset(priv);
+				return false;
+			}
 		}
 		if (rx->data.raw_addressing) {
 			skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
 						    page_info, len, napi,
-						     data_slot);
-			goto have_skb;
-		}
-		if (unlikely(!gve_can_recycle_pages(dev))) {
-			skb = gve_rx_copy(rx, dev, napi, page_info, len);
-			goto have_skb;
-		}
-		pagecount = page_count(page_info->page);
-		if (pagecount == 1) {
-			/* No part of this page is used by any SKBs; we attach
-			 * the page fragment to a new SKB and pass it up the
-			 * stack.
-			 */
-			skb = gve_rx_add_frags(napi, page_info, len);
-			if (!skb) {
-				u64_stats_update_begin(&rx->statss);
-				rx->rx_skb_alloc_fail++;
-				u64_stats_update_end(&rx->statss);
-				return false;
-			}
-			/* Make sure the kernel stack can't release the page */
-			get_page(page_info->page);
-			/* "flip" to other packet buffer on this page */
-			gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
-		} else if (pagecount >= 2) {
-			/* We have previously passed the other half of this
-			 * page up the stack, but it has not yet been freed.
-			 */
-			skb = gve_rx_copy(rx, dev, napi, page_info, len);
+						    data_slot,
+						    can_flip && recycle);
 		} else {
-			WARN(pagecount < 1, "Pagecount should never be < 1");
-			return false;
+			skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
+					 page_info, len, napi, data_slot,
+					 can_flip && recycle);
 		}
-	} else {
-		if (rx->data.raw_addressing)
-			skb = gve_rx_copy(rx, dev, napi, page_info, len);
-		else
-			skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
-						    page_info, len, napi,
-						    data_slot);
 	}
 
-have_skb:
-	/* We didn't manage to allocate an skb but we haven't had any
-	 * reset worthy failures.
-	 */
 	if (!skb) {
 		u64_stats_update_begin(&rx->statss);
 		rx->rx_skb_alloc_fail++;
@@ -490,14 +525,46 @@  static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
 
 	while ((fill_cnt & rx->mask) != (rx->cnt & rx->mask)) {
 		u32 idx = fill_cnt & rx->mask;
-		struct gve_rx_slot_page_info *page_info = &rx->data.page_info[idx];
-		struct gve_rx_data_slot *data_slot = &rx->data.data_ring[idx];
-		struct device *dev = &priv->pdev->dev;
+		struct gve_rx_slot_page_info *page_info =
+						&rx->data.page_info[idx];
 
-		gve_rx_free_buffer(dev, page_info, data_slot);
-		page_info->page = NULL;
-		if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot, rx))
-			break;
+		if (page_info->can_flip) {
+			/* The other half of the page is free because it was
+			 * free when we processed the descriptor. Flip to it.
+			 */
+			struct gve_rx_data_slot *data_slot =
+						&rx->data.data_ring[idx];
+
+			gve_rx_flip_buff(page_info, data_slot);
+			page_info->can_flip = false;
+		} else {
+			/* It is possible that the networking stack has already
+			 * finished processing all outstanding packets in the buffer
+			 * and it can be reused.
+			 * Flipping is unnecessary here - if the networking stack still
+			 * owns half the page it is impossible to tell which half. Either
+			 * the whole page is free or it needs to be replaced.
+			 */
+			int recycle = gve_rx_can_recycle_buffer(page_info->page);
+
+			if (recycle < 0) {
+				gve_schedule_reset(priv);
+				return false;
+			}
+			if (!recycle) {
+				/* We can't reuse the buffer - alloc a new one*/
+				struct gve_rx_data_slot *data_slot =
+						&rx->data.data_ring[idx];
+				struct device *dev = &priv->pdev->dev;
+
+				gve_rx_free_buffer(dev, page_info, data_slot);
+				page_info->page = NULL;
+				if (gve_rx_alloc_buffer(priv, dev, page_info,
+							data_slot, rx)) {
+					break;
+				}
+			}
+		}
 		fill_cnt++;
 	}
 	rx->fill_cnt = fill_cnt;

[net-next,v3,3/4] gve: Rx Buffer Recycling

Commit Message

Comments

Patch