diff mbox

[12/12] cxgb4: NUMA-aware Tx queue allocations

Message ID 1292357896-14339-13-git-send-email-dm@chelsio.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Dimitris Michailidis Dec. 14, 2010, 8:18 p.m. UTC
Allocate Tx queue memory on the node indicated by the new
netdev_queue_numa_node_read.  If that fails we allocate on any node.

Signed-off-by: Dimitris Michailidis <dm@chelsio.com>
---
 drivers/net/cxgb4/sge.c |   20 +++++++++++++-------
 1 files changed, 13 insertions(+), 7 deletions(-)

Comments

Eric Dumazet Dec. 14, 2010, 9:17 p.m. UTC | #1
Le mardi 14 décembre 2010 à 12:18 -0800, Dimitris Michailidis a écrit :
> Allocate Tx queue memory on the node indicated by the new
> netdev_queue_numa_node_read.  If that fails we allocate on any node.
> 
> Signed-off-by: Dimitris Michailidis <dm@chelsio.com>
> ---
>  drivers/net/cxgb4/sge.c |   20 +++++++++++++-------
>  1 files changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/cxgb4/sge.c b/drivers/net/cxgb4/sge.c
> index cc0b997..ed98b8a 100644
> --- a/drivers/net/cxgb4/sge.c
> +++ b/drivers/net/cxgb4/sge.c
> @@ -579,6 +579,7 @@ static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
>   *	@phys: the physical address of the allocated ring
>   *	@metadata: address of the array holding the SW state for the ring
>   *	@stat_size: extra space in HW ring for status information
> + *	@node: preferred node for memory allocations
>   *
>   *	Allocates resources for an SGE descriptor ring, such as Tx queues,
>   *	free buffer lists, or response queues.  Each SGE ring requires
> @@ -590,7 +591,7 @@ static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
>   */
>  static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
>  			size_t sw_size, dma_addr_t *phys, void *metadata,
> -			size_t stat_size)
> +			size_t stat_size, int node)
>  {
>  	size_t len = nelem * elem_size + stat_size;
>  	void *s = NULL;
> @@ -599,7 +600,10 @@ static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
>  	if (!p)
>  		return NULL;
>  	if (sw_size) {
> -		s = kcalloc(nelem, sw_size, GFP_KERNEL);
> +		if (node >= 0)
> +			s = kzalloc_node(nelem * sw_size, GFP_KERNEL, node);

kzalloc_node() has a fallback, you dont need to retry with kcalloc()

> +		if (!s)
> +			s = kcalloc(nelem, sw_size, GFP_KERNEL);
>  
>  		if (!s) {
>  			dma_free_coherent(dev, len, p, *phys);

Also, I am not sure it is going to work, since we can setup XPS only
after device being setup ?

By the time your driver allocates rings, we probably read
-1/NUMA_NO_NODE



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dimitris Michailidis Dec. 14, 2010, 10:51 p.m. UTC | #2
Eric Dumazet wrote:
> Le mardi 14 décembre 2010 à 12:18 -0800, Dimitris Michailidis a écrit :
>> Allocate Tx queue memory on the node indicated by the new
>> netdev_queue_numa_node_read.  If that fails we allocate on any node.
>>
>> Signed-off-by: Dimitris Michailidis <dm@chelsio.com>
>> ---
>>  drivers/net/cxgb4/sge.c |   20 +++++++++++++-------
>>  1 files changed, 13 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/net/cxgb4/sge.c b/drivers/net/cxgb4/sge.c
>> index cc0b997..ed98b8a 100644
>> --- a/drivers/net/cxgb4/sge.c
>> +++ b/drivers/net/cxgb4/sge.c
>> @@ -579,6 +579,7 @@ static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
>>   *	@phys: the physical address of the allocated ring
>>   *	@metadata: address of the array holding the SW state for the ring
>>   *	@stat_size: extra space in HW ring for status information
>> + *	@node: preferred node for memory allocations
>>   *
>>   *	Allocates resources for an SGE descriptor ring, such as Tx queues,
>>   *	free buffer lists, or response queues.  Each SGE ring requires
>> @@ -590,7 +591,7 @@ static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
>>   */
>>  static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
>>  			size_t sw_size, dma_addr_t *phys, void *metadata,
>> -			size_t stat_size)
>> +			size_t stat_size, int node)
>>  {
>>  	size_t len = nelem * elem_size + stat_size;
>>  	void *s = NULL;
>> @@ -599,7 +600,10 @@ static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
>>  	if (!p)
>>  		return NULL;
>>  	if (sw_size) {
>> -		s = kcalloc(nelem, sw_size, GFP_KERNEL);
>> +		if (node >= 0)
>> +			s = kzalloc_node(nelem * sw_size, GFP_KERNEL, node);
> 
> kzalloc_node() has a fallback, you dont need to retry with kcalloc()

I took this retry part from ixgbe but I can remove it if it's not needed. 
Luckily it's the last patch in the series.

> 
>> +		if (!s)
>> +			s = kcalloc(nelem, sw_size, GFP_KERNEL);
>>  
>>  		if (!s) {
>>  			dma_free_coherent(dev, len, p, *phys);
> 
> Also, I am not sure it is going to work, since we can setup XPS only
> after device being setup ?
> 
> By the time your driver allocates rings, we probably read
> -1/NUMA_NO_NODE

XPS is available after registration.  The queues are allocated at open time, 
if one configures XPS prior to that the allocations happen on the right 
nodes.  I've tried this and this is the behavior I see.  It is true that 
setting XPS after open doesn't affect the queue allocations.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/cxgb4/sge.c b/drivers/net/cxgb4/sge.c
index cc0b997..ed98b8a 100644
--- a/drivers/net/cxgb4/sge.c
+++ b/drivers/net/cxgb4/sge.c
@@ -579,6 +579,7 @@  static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
  *	@phys: the physical address of the allocated ring
  *	@metadata: address of the array holding the SW state for the ring
  *	@stat_size: extra space in HW ring for status information
+ *	@node: preferred node for memory allocations
  *
  *	Allocates resources for an SGE descriptor ring, such as Tx queues,
  *	free buffer lists, or response queues.  Each SGE ring requires
@@ -590,7 +591,7 @@  static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
  */
 static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
 			size_t sw_size, dma_addr_t *phys, void *metadata,
-			size_t stat_size)
+			size_t stat_size, int node)
 {
 	size_t len = nelem * elem_size + stat_size;
 	void *s = NULL;
@@ -599,7 +600,10 @@  static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
 	if (!p)
 		return NULL;
 	if (sw_size) {
-		s = kcalloc(nelem, sw_size, GFP_KERNEL);
+		if (node >= 0)
+			s = kzalloc_node(nelem * sw_size, GFP_KERNEL, node);
+		if (!s)
+			s = kcalloc(nelem, sw_size, GFP_KERNEL);
 
 		if (!s) {
 			dma_free_coherent(dev, len, p, *phys);
@@ -1982,7 +1986,7 @@  int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	iq->size = roundup(iq->size, 16);
 
 	iq->desc = alloc_ring(adap->pdev_dev, iq->size, iq->iqe_len, 0,
-			      &iq->phys_addr, NULL, 0);
+			      &iq->phys_addr, NULL, 0, NUMA_NO_NODE);
 	if (!iq->desc)
 		return -ENOMEM;
 
@@ -2008,7 +2012,7 @@  int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		fl->size = roundup(fl->size, 8);
 		fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
 				      sizeof(struct rx_sw_desc), &fl->addr,
-				      &fl->sdesc, STAT_LEN);
+				      &fl->sdesc, STAT_LEN, NUMA_NO_NODE);
 		if (!fl->desc)
 			goto fl_nomem;
 
@@ -2095,7 +2099,8 @@  int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 
 	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
 			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
-			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
+			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN,
+			netdev_queue_numa_node_read(netdevq));
 	if (!txq->q.desc)
 		return -ENOMEM;
 
@@ -2147,7 +2152,7 @@  int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 
 	txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
 				 sizeof(struct tx_desc), 0, &txq->q.phys_addr,
-				 NULL, 0);
+				 NULL, 0, NUMA_NO_NODE);
 	if (!txq->q.desc)
 		return -ENOMEM;
 
@@ -2198,7 +2203,8 @@  int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 
 	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
 			sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
-			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
+			&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN,
+			NUMA_NO_NODE);
 	if (!txq->q.desc)
 		return -ENOMEM;