Message ID | 1291442121-3302-3-git-send-email-xiaosuo@gmail.com |
---|---|
State | Superseded, archived |
Delegated to: | David Miller |
Headers | show |
Changli Gao wrote: > tq is only used in ri_tasklet, so we move it from ifb_private to a > stack variable of ri_tasklet. > > skb_queue_splice_tail_init() is used instead of the open coded and slow > one. > > Signed-off-by: Changli Gao <xiaosuo@gmail.com> > --- > drivers/net/ifb.c | 49 ++++++++++++------------------------------------- > 1 file changed, 12 insertions(+), 37 deletions(-) > diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c > index d1e362a..cd6e90d 100644 > --- a/drivers/net/ifb.c > +++ b/drivers/net/ifb.c > @@ -39,9 +39,7 @@ > #define TX_Q_LIMIT 32 > struct ifb_private { > struct tasklet_struct ifb_tasklet; > - int tasklet_pending; > struct sk_buff_head rq; > - struct sk_buff_head tq; > }; > > static int numifbs = 2; > @@ -53,27 +51,25 @@ static int ifb_close(struct net_device *dev); > > static void ri_tasklet(unsigned long dev) > { > - > struct net_device *_dev = (struct net_device *)dev; > struct ifb_private *dp = netdev_priv(_dev); > struct net_device_stats *stats = &_dev->stats; > struct netdev_queue *txq; > struct sk_buff *skb; > + struct sk_buff_head tq; > > + __skb_queue_head_init(&tq); > txq = netdev_get_tx_queue(_dev, 0); > - if ((skb = skb_peek(&dp->tq)) == NULL) { > - if (__netif_tx_trylock(txq)) { > - while ((skb = skb_dequeue(&dp->rq)) != NULL) { > - skb_queue_tail(&dp->tq, skb); > - } > - __netif_tx_unlock(txq); > - } else { > - /* reschedule */ > - goto resched; > - } > + if (!__netif_tx_trylock(txq)) { > + tasklet_schedule(&dp->ifb_tasklet); > + return; > } > + skb_queue_splice_tail_init(&dp->rq, &tq); > + if (netif_tx_queue_stopped(txq)) > + netif_tx_wake_queue(txq); > + __netif_tx_unlock(txq); > > - while ((skb = skb_dequeue(&dp->tq)) != NULL) { > + while ((skb = __skb_dequeue(&tq)) != NULL) { > u32 from = G_TC_FROM(skb->tc_verd); > > skb->tc_verd = 0; > @@ -87,7 +83,7 @@ static void ri_tasklet(unsigned long dev) > rcu_read_unlock(); > dev_kfree_skb(skb); > stats->tx_dropped++; > - break; > + continue; IMHO this line is a bugfix and should be a separate patch (for stable). The rest looks OK to me but you could probably skip locking of dp->rq similarly to tq. Jarek P. > } > rcu_read_unlock(); > skb->skb_iif = _dev->ifindex; > @@ -100,23 +96,6 @@ static void ri_tasklet(unsigned long dev) > } else > BUG(); > } > - > - if (__netif_tx_trylock(txq)) { > - if ((skb = skb_peek(&dp->rq)) == NULL) { > - dp->tasklet_pending = 0; > - if (netif_queue_stopped(_dev)) > - netif_wake_queue(_dev); > - } else { > - __netif_tx_unlock(txq); > - goto resched; > - } > - __netif_tx_unlock(txq); > - } else { > -resched: > - dp->tasklet_pending = 1; > - tasklet_schedule(&dp->ifb_tasklet); > - } > - > } > > static const struct net_device_ops ifb_netdev_ops = { > @@ -162,10 +141,8 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) > } > > skb_queue_tail(&dp->rq, skb); > - if (!dp->tasklet_pending) { > - dp->tasklet_pending = 1; > + if (skb_queue_len(&dp->rq) == 1) > tasklet_schedule(&dp->ifb_tasklet); > - } > > return NETDEV_TX_OK; > } > @@ -177,7 +154,6 @@ static int ifb_close(struct net_device *dev) > tasklet_kill(&dp->ifb_tasklet); > netif_stop_queue(dev); > skb_queue_purge(&dp->rq); > - skb_queue_purge(&dp->tq); > return 0; > } > > @@ -187,7 +163,6 @@ static int ifb_open(struct net_device *dev) > > tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev); > skb_queue_head_init(&dp->rq); > - skb_queue_head_init(&dp->tq); > netif_start_queue(dev); > > return 0; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Dec 4, 2010 at 9:15 PM, Jarek Poplawski <jarkao2@gmail.com> wrote: > Changli Gao wrote: >> tq is only used in ri_tasklet, so we move it from ifb_private to a >> stack variable of ri_tasklet. >> >> skb_queue_splice_tail_init() is used instead of the open coded and slow >> one. >> >> Signed-off-by: Changli Gao <xiaosuo@gmail.com> >> --- >> drivers/net/ifb.c | 49 ++++++++++++------------------------------------- >> 1 file changed, 12 insertions(+), 37 deletions(-) >> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c >> index d1e362a..cd6e90d 100644 >> --- a/drivers/net/ifb.c >> +++ b/drivers/net/ifb.c >> @@ -39,9 +39,7 @@ >> #define TX_Q_LIMIT 32 >> struct ifb_private { >> struct tasklet_struct ifb_tasklet; >> - int tasklet_pending; >> struct sk_buff_head rq; >> - struct sk_buff_head tq; >> }; >> >> static int numifbs = 2; >> @@ -53,27 +51,25 @@ static int ifb_close(struct net_device *dev); >> >> static void ri_tasklet(unsigned long dev) >> { >> - >> struct net_device *_dev = (struct net_device *)dev; >> struct ifb_private *dp = netdev_priv(_dev); >> struct net_device_stats *stats = &_dev->stats; >> struct netdev_queue *txq; >> struct sk_buff *skb; >> + struct sk_buff_head tq; >> >> + __skb_queue_head_init(&tq); >> txq = netdev_get_tx_queue(_dev, 0); >> - if ((skb = skb_peek(&dp->tq)) == NULL) { >> - if (__netif_tx_trylock(txq)) { >> - while ((skb = skb_dequeue(&dp->rq)) != NULL) { >> - skb_queue_tail(&dp->tq, skb); >> - } >> - __netif_tx_unlock(txq); >> - } else { >> - /* reschedule */ >> - goto resched; >> - } >> + if (!__netif_tx_trylock(txq)) { >> + tasklet_schedule(&dp->ifb_tasklet); >> + return; >> } >> + skb_queue_splice_tail_init(&dp->rq, &tq); >> + if (netif_tx_queue_stopped(txq)) >> + netif_tx_wake_queue(txq); >> + __netif_tx_unlock(txq); >> >> - while ((skb = skb_dequeue(&dp->tq)) != NULL) { >> + while ((skb = __skb_dequeue(&tq)) != NULL) { >> u32 from = G_TC_FROM(skb->tc_verd); >> >> skb->tc_verd = 0; >> @@ -87,7 +83,7 @@ static void ri_tasklet(unsigned long dev) >> rcu_read_unlock(); >> dev_kfree_skb(skb); >> stats->tx_dropped++; >> - break; >> + continue; > > IMHO this line is a bugfix and should be a separate patch (for stable). It sounds reasonable. > > The rest looks OK to me but you could probably skip locking of dp->rq > similarly to tq. > OK. Thanks.
On Sat, 2010-12-04 at 13:55 +0800, Changli Gao wrote: > tq is only used in ri_tasklet, so we move it from ifb_private to a > stack variable of ri_tasklet. > > skb_queue_splice_tail_init() is used instead of the open coded and slow > one. I like the splice idea but this patch makes me twitch a little. What test setup did you use to check it? cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Dec 4, 2010 at 10:18 PM, jamal <hadi@cyberus.ca> wrote: > > I like the splice idea but this patch makes me twitch > a little. What test setup did you use to check it? > Just a simple test: tc qdisc add dev eth0 ingress tc filter add dev eth0 parent ffff: protocol ip basic action mirred egress redirect dev ifb0
On Sat, 2010-12-04 at 14:15 +0100, Jarek Poplawski wrote: > > @@ -87,7 +83,7 @@ static void ri_tasklet(unsigned long dev) > > rcu_read_unlock(); > > dev_kfree_skb(skb); > > stats->tx_dropped++; > > - break; > > + continue; > > IMHO this line is a bugfix and should be a separate patch (for stable). Should be separate, yes. Bug? no. Improvement, maybe ;-> The idea was to defer processing at the first error. Changli is changing it to continue despite the error. The initial goal was to yield whenever possible since we dont maintain a lot of state. cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2010-12-04 at 09:18 -0500, jamal wrote: > I like the splice idea but this patch makes me twitch > a little. What test setup did you use to check it? Ok, here's one thing you changed which is important. We do: -->XXX-->rq-->tq-->XXX--> rq is controlled by queue limit. We only load rq to tq if all of tq is empty. If it is not we dont move things over. Essentially this is a flow control scheme. We dont want many sources to be overwhelming us with packets and every time we grab a txqlen number of packets. For this reason: I would be comfortable if all you did was to add the splice after you skb_peek() - i think that would be a good improvement which is not bound to break anything else. cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Dec 4, 2010 at 10:28 PM, jamal <hadi@cyberus.ca> wrote: > On Sat, 2010-12-04 at 14:15 +0100, Jarek Poplawski wrote: > >> > @@ -87,7 +83,7 @@ static void ri_tasklet(unsigned long dev) >> > rcu_read_unlock(); >> > dev_kfree_skb(skb); >> > stats->tx_dropped++; >> > - break; >> > + continue; >> >> IMHO this line is a bugfix and should be a separate patch (for stable). > > Should be separate, yes. > Bug? no. If we breaks the loop when there are still skbs in tq and no skb in rq, the skbs will be left in txq until new skbs are enqueued into rq. In rare cases, no new skb is queued, then these skbs will stay in rq forever. - if (__netif_tx_trylock(txq)) { - if ((skb = skb_peek(&dp->rq)) == NULL) { - dp->tasklet_pending = 0; - if (netif_queue_stopped(_dev)) - netif_wake_queue(_dev); - } else { - __netif_tx_unlock(txq); - goto resched; - } - __netif_tx_unlock(txq); - } else { -resched: - dp->tasklet_pending = 1; - tasklet_schedule(&dp->ifb_tasklet); - } -
On Sat, 2010-12-04 at 09:28 -0500, jamal wrote: > The idea was to defer processing at the first error. Changli > is changing it to continue despite the error. > The initial goal was to yield whenever possible since we dont maintain > a lot of state. Changli: Other points we could defer processing is in case of packets being dropped by dev_queue_xmit and netif_rx cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Dec 4, 2010 at 10:42 PM, jamal <hadi@cyberus.ca> wrote: > On Sat, 2010-12-04 at 09:18 -0500, jamal wrote: > >> I like the splice idea but this patch makes me twitch >> a little. What test setup did you use to check it? > > Ok, here's one thing you changed which is important. We do: > > -->XXX-->rq-->tq-->XXX--> > > rq is controlled by queue limit. > We only load rq to tq if all of tq is empty. If it is not > we dont move things over. Essentially this is a flow > control scheme. We dont want many sources to be overwhelming > us with packets and every time we grab a txqlen number of packets. > For this reason: > I would be comfortable if all you did was to add the splice > after you skb_peek() - i think that would be a good improvement > which is not bound to break anything else. > Maybe you misread my patch. tq is a stack variable in ri_tasklet, and initialized all the time. ri_tasklet() won't exits until tq is empty().
On Sat, 2010-12-04 at 22:45 +0800, Changli Gao wrote: > > If we breaks the loop when there are still skbs in tq and no skb in > rq, the skbs will be left in txq until new skbs are enqueued into rq. > In rare cases, no new skb is queued, then these skbs will stay in rq > forever. So should we goto resched? cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2010-12-04 at 22:50 +0800, Changli Gao wrote: > Maybe you misread my patch. tq is a stack variable in ri_tasklet, and > initialized all the time. ri_tasklet() won't exits until tq is > empty(). in your patch is a variable on the stack. What i am saying is you should defer processing when there is an error (note the two other spots i mentioned). This means you may leave dp->tq non-empty and therefore it needs to be saved somewhere as it is before your patch. cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Dec 4, 2010 at 10:55 PM, jamal <hadi@cyberus.ca> wrote: > On Sat, 2010-12-04 at 22:45 +0800, Changli Gao wrote: > >> >> If we breaks the loop when there are still skbs in tq and no skb in >> rq, the skbs will be left in txq until new skbs are enqueued into rq. >> In rare cases, no new skb is queued, then these skbs will stay in rq >> forever. > > So should we goto resched? > Only if we can't lock the txq or rq isn't empty, we goto resched. So it is a bug.
On Sat, Dec 4, 2010 at 10:59 PM, jamal <hadi@cyberus.ca> wrote: > On Sat, 2010-12-04 at 22:50 +0800, Changli Gao wrote: > >> Maybe you misread my patch. tq is a stack variable in ri_tasklet, and >> initialized all the time. ri_tasklet() won't exits until tq is >> empty(). > > in your patch is a variable on the stack. > What i am saying is you should defer processing when there is an > error (note the two other spots i mentioned). > This means you may leave dp->tq non-empty and therefore it > needs to be saved somewhere as it is before your patch. > I know what you concern now. Thanks. I'll keep the old behavior in v2.
On Sat, 2010-12-04 at 23:01 +0800, Changli Gao wrote: > On Sat, Dec 4, 2010 at 10:55 PM, jamal <hadi@cyberus.ca> wrote: > > On Sat, 2010-12-04 at 22:45 +0800, Changli Gao wrote: > > > >> > >> If we breaks the loop when there are still skbs in tq and no skb in > >> rq, the skbs will be left in txq until new skbs are enqueued into rq. > >> In rare cases, no new skb is queued, then these skbs will stay in rq > >> forever. > > > > So should we goto resched? > > > > Only if we can't lock the txq or rq isn't empty, we goto resched. So > it is a bug. And to be explicit: Yes, meant to say there is a bug if we break out in the scenario you described above - the fix is to jump to resched. Why do we need the lock? cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2010-12-04 at 23:07 +0800, Changli Gao wrote: > > I know what you concern now. Thanks. I'll keep the old behavior in v2. Ok - thanks Changli. cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Dec 04, 2010 at 09:48:47AM -0500, jamal wrote: > On Sat, 2010-12-04 at 09:28 -0500, jamal wrote: > > > The idea was to defer processing at the first error. Changli > > is changing it to continue despite the error. > > The initial goal was to yield whenever possible since we dont maintain > > a lot of state. > > Changli: > Other points we could defer processing is in case of packets > being dropped by dev_queue_xmit and netif_rx Hmm... But we didn't care until now... ;-) Btw. is it really very probable (and worth bothering) that this current error of NULL dev get fixed before we purge this tq queue with deferral one by one? Cheers, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2010-12-04 at 16:40 +0100, Jarek Poplawski wrote: > Hmm... But we didn't care until now... ;-) Well, if Changli didnt post - there would be no discussion ;-> The message was lost in the translation somewhere; events such as patches sometimes serve as good reminders. > Btw. is it really very > probable (and worth bothering) that this current error of NULL dev > get fixed before we purge this tq queue with deferral one by one? Indeed - this is working against something buggy. But it has happened often in the past. And the likelihood of there being a few bad ones in the train of packets when this occurs is high. But there are many packets there that wont suffer this sympton - so the only fair scheme is to check all. Note: a BUG() seems unreasonable and the deferring serves as a throttling scheme. What do you have in mind? cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Dec 04, 2010 at 11:08:01AM -0500, jamal wrote: > On Sat, 2010-12-04 at 16:40 +0100, Jarek Poplawski wrote: > > > Hmm... But we didn't care until now... ;-) > > Well, if Changli didnt post - there would be no discussion ;-> > The message was lost in the translation somewhere; events such as > patches sometimes serve as good reminders. > > > Btw. is it really very > > probable (and worth bothering) that this current error of NULL dev > > get fixed before we purge this tq queue with deferral one by one? > > Indeed - this is working against something buggy. But it has happened > often in the past. And the likelihood of there being a few bad ones > in the train of packets when this occurs is high. But there are many > packets there that wont suffer this sympton - so the only fair scheme is > to check all. Note: a BUG() seems unreasonable and the deferring serves > as a throttling scheme. > What do you have in mind? I'm simply not convinced this kind of (fast) throttling can properly fix any of the problems (what about other flows in the queue), while Changli's patch makes this tasklet simpler and a bit faster. Cheers, Jarek P. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Dec 5, 2010 at 12:56 AM, Jarek Poplawski <jarkao2@gmail.com> wrote: > > I'm simply not convinced this kind of (fast) throttling can properly > fix any of the problems (what about other flows in the queue), while > Changli's patch makes this tasklet simpler and a bit faster. > The error case handled currently is the original netdev disappears but the corresponding skbs are still in ifb. I do also think checking the return value of netif_rx() and dev_queue_xmit() can fix more 'problems'. :)
On Sun, Dec 5, 2010 at 8:22 AM, Changli Gao <xiaosuo@gmail.com> wrote: > > The error case handled currently is the original netdev disappears but > the corresponding skbs are still in ifb. > > I do also think checking the return value of netif_rx() and > dev_queue_xmit() can fix more 'problems'. :) > I have posted the V2 and split the bug fix to a separate one. BTW: My ultimate goal is making ifb a multi-queue NIC, and the number of queues is equal to the number of the possible CPUs.
On Sun, 2010-12-05 at 08:22 +0800, Changli Gao wrote: > On Sun, Dec 5, 2010 at 12:56 AM, Jarek Poplawski <jarkao2@gmail.com> wrote: > > > > I'm simply not convinced this kind of (fast) throttling can properly > > fix any of the problems (what about other flows in the queue), while > > Changli's patch makes this tasklet simpler and a bit faster. > > > > The error case handled currently is the original netdev disappears but > the corresponding skbs are still in ifb. > > I do also think checking the return value of netif_rx() and > dev_queue_xmit() can fix more 'problems'. :) Please proceed with a patch for that as well.. cheers, jamal PS:- We forgot to thank Jarek for pointing out the bug. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, 2010-12-05 at 09:13 +0800, Changli Gao wrote: > BTW: My ultimate goal is making ifb a multi-queue NIC, and the number > of queues is equal to the number of the possible CPUs. My view is this is going to be tricky because: - we use tasklets. When we reschedule we can end up on a differrent cpu. -I dont see any point in having a separate softIRQ - and if you do use other mechanisms it would require a lot more testing since there are quiet a few use cases of ifb cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Dec 5, 2010 at 10:30 PM, jamal <hadi@cyberus.ca> wrote: > On Sun, 2010-12-05 at 09:13 +0800, Changli Gao wrote: > >> BTW: My ultimate goal is making ifb a multi-queue NIC, and the number >> of queues is equal to the number of the possible CPUs. > > > My view is this is going to be tricky because: > - we use tasklets. When we reschedule we can end up on a differrent > cpu. The tasklets always been scheduled to the current CPU unless it has been schedule already on the other CPU. > -I dont see any point in having a separate softIRQ > - and if you do use other mechanisms it would require a lot more > testing since there are quiet a few use cases of ifb > I keep using tasklet. The attachment is the alpha version.
Le dimanche 05 décembre 2010 à 22:40 +0800, Changli Gao a écrit : > On Sun, Dec 5, 2010 at 10:30 PM, jamal <hadi@cyberus.ca> wrote: > > On Sun, 2010-12-05 at 09:13 +0800, Changli Gao wrote: > > > >> BTW: My ultimate goal is making ifb a multi-queue NIC, and the number > >> of queues is equal to the number of the possible CPUs. > > > > > > My view is this is going to be tricky because: > > - we use tasklets. When we reschedule we can end up on a differrent > > cpu. > > The tasklets always been scheduled to the current CPU unless it has > been schedule already on the other CPU. > > > -I dont see any point in having a separate softIRQ > > - and if you do use other mechanisms it would require a lot more > > testing since there are quiet a few use cases of ifb > > > > I keep using tasklet. The attachment is the alpha version. > for_each_possible_cpu(cpu) { q = per_cpu_ptr(p->q, cpu); ... dev_ifb = alloc_netdev_mq(sizeof(struct ifb_private), "ifb%d", ifb_setup, num_possible_cpus()); This is a very usual error. You can have machines with 2 possibles cpus, numbered 0 and 8 Therere, you must use nr_cpu_ids here instead of num_possible_cpus() -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, 2010-12-05 at 22:40 +0800, Changli Gao wrote: > On Sun, Dec 5, 2010 at 10:30 PM, jamal <hadi@cyberus.ca> wrote: > > - we use tasklets. When we reschedule we can end up on a differrent > > cpu. > > The tasklets always been scheduled to the current CPU unless it has > been schedule already on the other CPU. I dont think this can be guaranteed - So the potential of packet reordering exists. > I keep using tasklet. The attachment is the alpha version. >From quick glance I dont see any technicalities - just the reordering concern. cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Dec 5, 2010 at 11:13 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Le dimanche 05 décembre 2010 à 22:40 +0800, Changli Gao a écrit : >> On Sun, Dec 5, 2010 at 10:30 PM, jamal <hadi@cyberus.ca> wrote: >> > On Sun, 2010-12-05 at 09:13 +0800, Changli Gao wrote: >> > >> >> BTW: My ultimate goal is making ifb a multi-queue NIC, and the number >> >> of queues is equal to the number of the possible CPUs. >> > >> > >> > My view is this is going to be tricky because: >> > - we use tasklets. When we reschedule we can end up on a differrent >> > cpu. >> >> The tasklets always been scheduled to the current CPU unless it has >> been schedule already on the other CPU. >> >> > -I dont see any point in having a separate softIRQ >> > - and if you do use other mechanisms it would require a lot more >> > testing since there are quiet a few use cases of ifb >> > >> >> I keep using tasklet. The attachment is the alpha version. >> > > for_each_possible_cpu(cpu) { > q = per_cpu_ptr(p->q, cpu); > ... > > dev_ifb = alloc_netdev_mq(sizeof(struct ifb_private), "ifb%d", > ifb_setup, num_possible_cpus()); > > This is a very usual error. > > You can have machines with 2 possibles cpus, numbered 0 and 8 > > Therere, you must use nr_cpu_ids here instead of num_possible_cpus() > Thanks.
On Sun, Dec 5, 2010 at 11:13 PM, jamal <hadi@cyberus.ca> wrote: > On Sun, 2010-12-05 at 22:40 +0800, Changli Gao wrote: >> On Sun, Dec 5, 2010 at 10:30 PM, jamal <hadi@cyberus.ca> wrote: > >> > - we use tasklets. When we reschedule we can end up on a differrent >> > cpu. >> >> The tasklets always been scheduled to the current CPU unless it has >> been schedule already on the other CPU. > > I dont think this can be guaranteed - So the potential of packet > reordering exists. > The current implementation can guarantee it. However, I can't find documentation about this 'feature', but I think this behavior should not be changed in future since maybe some call sites relay on it.
On Sun, 2010-12-05 at 23:22 +0800, Changli Gao wrote: > The current implementation can guarantee it. However, I can't find > documentation about this 'feature', but I think this behavior should > not be changed in future since maybe some call sites relay on it. When you think you are in good shape - please add some debug hooks so we can verify this especially under a huge traffic input and I will test it (thats what end of year holidays are for). cheers, jamal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Dec 05, 2010 at 09:27:11AM -0500, jamal wrote:
...
> PS:- We forgot to thank Jarek for pointing out the bug.
Not at all! I only pointed out that Changli didn't point out ;-)
All credits go to him and you for pointing out the appropriate fix.
Cheers,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index d1e362a..cd6e90d 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -39,9 +39,7 @@ #define TX_Q_LIMIT 32 struct ifb_private { struct tasklet_struct ifb_tasklet; - int tasklet_pending; struct sk_buff_head rq; - struct sk_buff_head tq; }; static int numifbs = 2; @@ -53,27 +51,25 @@ static int ifb_close(struct net_device *dev); static void ri_tasklet(unsigned long dev) { - struct net_device *_dev = (struct net_device *)dev; struct ifb_private *dp = netdev_priv(_dev); struct net_device_stats *stats = &_dev->stats; struct netdev_queue *txq; struct sk_buff *skb; + struct sk_buff_head tq; + __skb_queue_head_init(&tq); txq = netdev_get_tx_queue(_dev, 0); - if ((skb = skb_peek(&dp->tq)) == NULL) { - if (__netif_tx_trylock(txq)) { - while ((skb = skb_dequeue(&dp->rq)) != NULL) { - skb_queue_tail(&dp->tq, skb); - } - __netif_tx_unlock(txq); - } else { - /* reschedule */ - goto resched; - } + if (!__netif_tx_trylock(txq)) { + tasklet_schedule(&dp->ifb_tasklet); + return; } + skb_queue_splice_tail_init(&dp->rq, &tq); + if (netif_tx_queue_stopped(txq)) + netif_tx_wake_queue(txq); + __netif_tx_unlock(txq); - while ((skb = skb_dequeue(&dp->tq)) != NULL) { + while ((skb = __skb_dequeue(&tq)) != NULL) { u32 from = G_TC_FROM(skb->tc_verd); skb->tc_verd = 0; @@ -87,7 +83,7 @@ static void ri_tasklet(unsigned long dev) rcu_read_unlock(); dev_kfree_skb(skb); stats->tx_dropped++; - break; + continue; } rcu_read_unlock(); skb->skb_iif = _dev->ifindex; @@ -100,23 +96,6 @@ static void ri_tasklet(unsigned long dev) } else BUG(); } - - if (__netif_tx_trylock(txq)) { - if ((skb = skb_peek(&dp->rq)) == NULL) { - dp->tasklet_pending = 0; - if (netif_queue_stopped(_dev)) - netif_wake_queue(_dev); - } else { - __netif_tx_unlock(txq); - goto resched; - } - __netif_tx_unlock(txq); - } else { -resched: - dp->tasklet_pending = 1; - tasklet_schedule(&dp->ifb_tasklet); - } - } static const struct net_device_ops ifb_netdev_ops = { @@ -162,10 +141,8 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) } skb_queue_tail(&dp->rq, skb); - if (!dp->tasklet_pending) { - dp->tasklet_pending = 1; + if (skb_queue_len(&dp->rq) == 1) tasklet_schedule(&dp->ifb_tasklet); - } return NETDEV_TX_OK; } @@ -177,7 +154,6 @@ static int ifb_close(struct net_device *dev) tasklet_kill(&dp->ifb_tasklet); netif_stop_queue(dev); skb_queue_purge(&dp->rq); - skb_queue_purge(&dp->tq); return 0; } @@ -187,7 +163,6 @@ static int ifb_open(struct net_device *dev) tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev); skb_queue_head_init(&dp->rq); - skb_queue_head_init(&dp->tq); netif_start_queue(dev); return 0;
tq is only used in ri_tasklet, so we move it from ifb_private to a stack variable of ri_tasklet. skb_queue_splice_tail_init() is used instead of the open coded and slow one. Signed-off-by: Changli Gao <xiaosuo@gmail.com> --- drivers/net/ifb.c | 49 ++++++++++++------------------------------------- 1 file changed, 12 insertions(+), 37 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html