diff mbox

[2/3] caif: Add support for flow-control on device's tx-queue

Message ID 1322834777-2486-3-git-send-email-sjur.brandeland@stericsson.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

sjur.brandeland@stericsson.com Dec. 2, 2011, 2:06 p.m. UTC
Flow control is implemented by inspecting the qdisc queue length
in order to detect potential overflow on the TX queue. When a threshold
is reached flow-off is sent upwards in the CAIF stack. At the same time
the skb->destructor is hi-jacked in order to detect when the last packet
put on queue is consumed. When this "hi-jacked" packet is consumed, flow-on
is sent upwards in the CAIF stack.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
---
 net/caif/caif_dev.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 48 insertions(+), 0 deletions(-)

Comments

Eric Dumazet Dec. 2, 2011, 2:38 p.m. UTC | #1
Le vendredi 02 décembre 2011 à 15:06 +0100, Sjur Brændeland a écrit :
> Flow control is implemented by inspecting the qdisc queue length
> in order to detect potential overflow on the TX queue. When a threshold
> is reached flow-off is sent upwards in the CAIF stack. At the same time
> the skb->destructor is hi-jacked in order to detect when the last packet
> put on queue is consumed. When this "hi-jacked" packet is consumed, flow-on
> is sent upwards in the CAIF stack.
> 
> Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
> ---
>  net/caif/caif_dev.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 48 insertions(+), 0 deletions(-)
> 
> diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
> index f7e8c70..415353e 100644
> --- a/net/caif/caif_dev.c
> +++ b/net/caif/caif_dev.c
> @@ -34,6 +34,7 @@ struct caif_device_entry {
>  	struct list_head list;
>  	struct net_device *netdev;
>  	int __percpu *pcpu_refcnt;
> +	bool xoff;
>  };
>  
>  struct caif_device_entry_list {
> @@ -48,6 +49,7 @@ struct caif_net {
>  };
>  
>  static int caif_net_id;
> +static int q_high = 50; /* Percent */
>  
>  struct cfcnfg *get_cfcnfg(struct net *net)
>  {
> @@ -126,9 +128,28 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
>  	return NULL;
>  }
>  
> +void caif_flow_cb(struct sk_buff *skb)
> +{
> +	struct caif_device_entry *caifd;
> +	WARN_ON(skb->dev == NULL);
> +
> +	rcu_read_lock();
> +	caifd = caif_get(skb->dev);
> +	caifd->xoff = 0;
> +	caifd_hold(caifd);
> +	rcu_read_unlock();
> +
> +	caifd->layer.up->
> +		ctrlcmd(caifd->layer.up,
> +			_CAIF_CTRLCMD_PHYIF_FLOW_ON_IND,
> +			caifd->layer.id);
> +	caifd_put(caifd);
> +}
> +
>  static int transmit(struct cflayer *layer, struct cfpkt *pkt)
>  {
>  	int err;
> +	struct caif_dev_common *caifdev;
>  	struct caif_device_entry *caifd =
>  	    container_of(layer, struct caif_device_entry, layer);
>  	struct sk_buff *skb;
> @@ -137,6 +158,33 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt)
>  	skb->dev = caifd->netdev;
>  	skb_reset_network_header(skb);
>  	skb->protocol = htons(ETH_P_CAIF);
> +	caifdev = netdev_priv(caifd->netdev);
> +
> +	if (caifdev->flowctrl == NULL && caifd->netdev->tx_queue_len > 0 &&
> +			!caifd->xoff) {
> +		struct netdev_queue *txq;
> +		int high;
> +
> +		txq = netdev_get_tx_queue(skb->dev, 0);

Why queue 0 and not another one ?

> +		high = (caifd->netdev->tx_queue_len * q_high) / 100;
> +
> +		/* If we run with a TX queue, check if the queue is too long*/

Are you sure only this cpu can run here ? any lock is held ?

> +		if (netif_queue_stopped(caifd->netdev) ||
> +			qdisc_qlen(txq->qdisc) > high) {
> +
> +			pr_debug("queue stop(%d) or full (%d>%d) - XOFF\n",
> +					netif_queue_stopped(caifd->netdev),
> +					qdisc_qlen(txq->qdisc), high);
> +			caifd->xoff = 1;
> +			/* Hijack this skb free callback function. */
> +			skb_orphan(skb);
> +			skb->destructor = caif_flow_cb;
> +			caifd->layer.up->
> +				ctrlcmd(caifd->layer.up,
> +					_CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
> +					caifd->layer.id);
> +		}
> +	}
>  
>  	err = dev_queue_xmit(skb);
>  	if (err > 0)

What prevents dev_queue_xmit() to early orphan skb ?



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
sjur.brandeland@stericsson.com Dec. 3, 2011, 5:35 p.m. UTC | #2
Hi Eric,

> >  static int transmit(struct cflayer *layer, struct cfpkt *pkt)
> >  {
> >  	int err;
> > +	struct caif_dev_common *caifdev;
> >  	struct caif_device_entry *caifd =
> >  	    container_of(layer, struct caif_device_entry, layer);
> >  	struct sk_buff *skb;
> > @@ -137,6 +158,33 @@ static int transmit(struct cflayer *layer,
> struct cfpkt *pkt)
> >  	skb->dev = caifd->netdev;
> >  	skb_reset_network_header(skb);
> >  	skb->protocol = htons(ETH_P_CAIF);
> > +	caifdev = netdev_priv(caifd->netdev);
> > +
> > +	if (caifdev->flowctrl == NULL && caifd->netdev->tx_queue_len > 0
> &&
> > +			!caifd->xoff) {
> > +		struct netdev_queue *txq;
> > +		int high;
> > +
> > +		txq = netdev_get_tx_queue(skb->dev, 0);
>
> Why queue 0 and not another one ?

The purpose of flow-control here is to try to avoid loosing
"control" traffic such as AT-commands sent to the modem.

So far we have (too my knowledge) never configured
multiple queues for any CAIF interface. So for current
setup just queue 0 should work just fine.

But in future it might make sense to configure more than one queue:
One queue for control and others for IP traffic or debug.
I think the sensible setup then would be to have control traffic
on queue-0 with flow control, but just ignore overflow other queues
carrying IP and Debug traffic. (For proper IP queue management
this is done in the modem anyway - the radio-link normally
slower than the CAIF link so queues would normally build
in the modem not at the CAIF link interface).

In any case flow control on queue-0 would do.


> > +		high = (caifd->netdev->tx_queue_len * q_high) / 100;
> > +
> > +		/* If we run with a TX queue, check if the queue is too
> long*/
>
> Are you sure only this cpu can run here ? any lock is held ?

I guess there are multiple issues here.
1) Access to tx_queue: I see in net/core/dev.c access to qdisc is RCU
	protected. I believe I should add RCU locking here.

2) I don't believe I need to hold spin_lock for the detection of overflow.
	If I we have races here the worst thing that can
	happened is that Flow-off is a bit off in timing.

3) I think I'll add a spin_lock when accessing caifd->xoff to avoid
	multiple flow-off indications.

I'll make a new patch fixing these issues.

> > +		if (netif_queue_stopped(caifd->netdev) ||
> > +			qdisc_qlen(txq->qdisc) > high) {
> > +
> > +			pr_debug("queue stop(%d) or full (%d>%d) - XOFF\n",
> > +					netif_queue_stopped(caifd->netdev),
> > +					qdisc_qlen(txq->qdisc), high);
> > +			caifd->xoff = 1;
> > +			/* Hijack this skb free callback function. */
> > +			skb_orphan(skb);
> > +			skb->destructor = caif_flow_cb;
> > +			caifd->layer.up->
> > +				ctrlcmd(caifd->layer.up,
> > +					_CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
> > +					caifd->layer.id);
> > +		}
> > +	}
> >
> >  	err = dev_queue_xmit(skb);
> >  	if (err > 0)
>
> What prevents dev_queue_xmit() to early orphan skb ?

My understanding is that skb destructor primarily is used for socket
memory accounting. In this case we're fooling the socket accounting,
but it should only happen when hitting the flow-off queue length
threshold. If we have a tx queue on 1000 it would happen at most
every 500 packet, in reality much more seldom.

However I think I can, if I do locking properly, stash away the destructor
and call it when the flow callback is called... what do you think?


Thank you for reviewing this Eric, further feedback welcome.

Regards,
Sjur
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index f7e8c70..415353e 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -34,6 +34,7 @@  struct caif_device_entry {
 	struct list_head list;
 	struct net_device *netdev;
 	int __percpu *pcpu_refcnt;
+	bool xoff;
 };
 
 struct caif_device_entry_list {
@@ -48,6 +49,7 @@  struct caif_net {
 };
 
 static int caif_net_id;
+static int q_high = 50; /* Percent */
 
 struct cfcnfg *get_cfcnfg(struct net *net)
 {
@@ -126,9 +128,28 @@  static struct caif_device_entry *caif_get(struct net_device *dev)
 	return NULL;
 }
 
+void caif_flow_cb(struct sk_buff *skb)
+{
+	struct caif_device_entry *caifd;
+	WARN_ON(skb->dev == NULL);
+
+	rcu_read_lock();
+	caifd = caif_get(skb->dev);
+	caifd->xoff = 0;
+	caifd_hold(caifd);
+	rcu_read_unlock();
+
+	caifd->layer.up->
+		ctrlcmd(caifd->layer.up,
+			_CAIF_CTRLCMD_PHYIF_FLOW_ON_IND,
+			caifd->layer.id);
+	caifd_put(caifd);
+}
+
 static int transmit(struct cflayer *layer, struct cfpkt *pkt)
 {
 	int err;
+	struct caif_dev_common *caifdev;
 	struct caif_device_entry *caifd =
 	    container_of(layer, struct caif_device_entry, layer);
 	struct sk_buff *skb;
@@ -137,6 +158,33 @@  static int transmit(struct cflayer *layer, struct cfpkt *pkt)
 	skb->dev = caifd->netdev;
 	skb_reset_network_header(skb);
 	skb->protocol = htons(ETH_P_CAIF);
+	caifdev = netdev_priv(caifd->netdev);
+
+	if (caifdev->flowctrl == NULL && caifd->netdev->tx_queue_len > 0 &&
+			!caifd->xoff) {
+		struct netdev_queue *txq;
+		int high;
+
+		txq = netdev_get_tx_queue(skb->dev, 0);
+		high = (caifd->netdev->tx_queue_len * q_high) / 100;
+
+		/* If we run with a TX queue, check if the queue is too long*/
+		if (netif_queue_stopped(caifd->netdev) ||
+			qdisc_qlen(txq->qdisc) > high) {
+
+			pr_debug("queue stop(%d) or full (%d>%d) - XOFF\n",
+					netif_queue_stopped(caifd->netdev),
+					qdisc_qlen(txq->qdisc), high);
+			caifd->xoff = 1;
+			/* Hijack this skb free callback function. */
+			skb_orphan(skb);
+			skb->destructor = caif_flow_cb;
+			caifd->layer.up->
+				ctrlcmd(caifd->layer.up,
+					_CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
+					caifd->layer.id);
+		}
+	}
 
 	err = dev_queue_xmit(skb);
 	if (err > 0)