Message ID | 1285813497-7384-1-git-send-email-xiaosuo@gmail.com |
---|---|
State | Rejected, archived |
Delegated to: | David Miller |
Headers | show |
On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote: > Compare operations are more readable, and compilers generate the same code > for the both. As far as I know, not all supported versions of gcc generate the same code. Also, you could probably now remove the (__force u32) casts. > Use the macros fl4_* to shrink the length of the lines. > > Signed-off-by: Changli Gao <xiaosuo@gmail.com> > --- > net/ipv4/af_inet.c | 7 +++---- > net/ipv4/route.c | 27 ++++++++++++--------------- > 2 files changed, 15 insertions(+), 19 deletions(-) > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c > index f581f77..ef26640 100644 > --- a/net/ipv4/af_inet.c > +++ b/net/ipv4/af_inet.c > @@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, > > iph2 = ip_hdr(p); > > - if ((iph->protocol ^ iph2->protocol) | > - (iph->tos ^ iph2->tos) | > - ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | > - ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { > + if (iph->protocol != iph2->protocol || iph->tos != iph2->tos || > + (__force u32)iph->saddr != (__force u32)iph2->saddr || > + (__force u32)iph->daddr != (__force u32)iph2->daddr) { > NAPI_GRO_CB(p)->same_flow = 0; > continue; > } > diff --git a/net/ipv4/route.c b/net/ipv4/route.c > index 98beda4..6b00fde 100644 > --- a/net/ipv4/route.c > +++ b/net/ipv4/route.c > @@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net) > static inline bool compare_hash_inputs(const struct flowi *fl1, > const struct flowi *fl2) > { > - return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | > - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | > - (fl1->iif ^ fl2->iif)) == 0); > + return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst && > + (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src && > + fl1->iif == fl2->iif; > } > > static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) > { > - return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | > - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | > - (fl1->mark ^ fl2->mark) | > - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | > - (fl1->oif ^ fl2->oif) | > - (fl1->iif ^ fl2->iif)) == 0; > + return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst && > + (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src && > + fl1->mark == fl2->mark && > + *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos && > + fl1->oif == fl2->oif && fl1->iif == fl2->iif; > } > > static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) > @@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, > > for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; > rth = rcu_dereference(rth->dst.rt_next)) { > - if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | > - ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | > - (rth->fl.iif ^ iif) | > - rth->fl.oif | > - (rth->fl.fl4_tos ^ tos)) == 0 && > - rth->fl.mark == skb->mark && > + if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr && > + (__force u32)rth->fl.fl4_src == (__force u32)saddr && > + rth->fl.iif == iif && rth->fl.oif == 0 && > + rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark && > net_eq(dev_net(rth->dst.dev), net) && > !rt_is_expired(rth)) { > if (noref) { -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 30, 2010 at 10:49 AM, Joe Perches <joe@perches.com> wrote: > On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote: >> Compare operations are more readable, and compilers generate the same code >> for the both. > > As far as I know, not all supported versions of gcc > generate the same code. Is the former better for the compilers? > > Also, you could probably now remove the (__force u32) casts. > Maybe Eric doesn't think so.
On Thu, 2010-09-30 at 11:07 +0800, Changli Gao wrote: > On Thu, Sep 30, 2010 at 10:49 AM, Joe Perches <joe@perches.com> wrote: > > On Thu, 2010-09-30 at 10:24 +0800, Changli Gao wrote: > >> Compare operations are more readable, and compilers generate the same code > >> for the both. > > As far as I know, not all supported versions of gcc > > generate the same code. > Is the former better for the compilers? Yes. I don't know how much it matters though. > > Also, you could probably now remove the (__force u32) casts. > Maybe Eric doesn't think so. Comparisons of equal types don't need (__force u32) casts. They needed to be cast to u32 for the bitwise or's to avoid compiler warnings. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit : > Compare operations are more readable, and compilers generate the same code > for the both. > You have a buggy compiler then. I know this code is ugly, but please keep it as is, dont add conditional branches on hot paths. Thanks > Use the macros fl4_* to shrink the length of the lines. > > Signed-off-by: Changli Gao <xiaosuo@gmail.com> > --- > net/ipv4/af_inet.c | 7 +++---- > net/ipv4/route.c | 27 ++++++++++++--------------- > 2 files changed, 15 insertions(+), 19 deletions(-) > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c > index f581f77..ef26640 100644 > --- a/net/ipv4/af_inet.c > +++ b/net/ipv4/af_inet.c > @@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, > > iph2 = ip_hdr(p); > > - if ((iph->protocol ^ iph2->protocol) | > - (iph->tos ^ iph2->tos) | > - ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | > - ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { > + if (iph->protocol != iph2->protocol || iph->tos != iph2->tos || > + (__force u32)iph->saddr != (__force u32)iph2->saddr || > + (__force u32)iph->daddr != (__force u32)iph2->daddr) { > NAPI_GRO_CB(p)->same_flow = 0; > continue; > } > diff --git a/net/ipv4/route.c b/net/ipv4/route.c > index 98beda4..6b00fde 100644 > --- a/net/ipv4/route.c > +++ b/net/ipv4/route.c > @@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net) > static inline bool compare_hash_inputs(const struct flowi *fl1, > const struct flowi *fl2) > { > - return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | > - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | > - (fl1->iif ^ fl2->iif)) == 0); > + return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst && > + (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src && > + fl1->iif == fl2->iif; > } > > static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) > { > - return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | > - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | > - (fl1->mark ^ fl2->mark) | > - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | > - (fl1->oif ^ fl2->oif) | > - (fl1->iif ^ fl2->iif)) == 0; > + return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst && > + (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src && > + fl1->mark == fl2->mark && > + *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos && > + fl1->oif == fl2->oif && fl1->iif == fl2->iif; > } > > static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) > @@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, > > for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; > rth = rcu_dereference(rth->dst.rt_next)) { > - if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | > - ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | > - (rth->fl.iif ^ iif) | > - rth->fl.oif | > - (rth->fl.fl4_tos ^ tos)) == 0 && > - rth->fl.mark == skb->mark && > + if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr && > + (__force u32)rth->fl.fl4_src == (__force u32)saddr && > + rth->fl.iif == iif && rth->fl.oif == 0 && > + rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark && > net_eq(dev_net(rth->dst.dev), net) && > !rt_is_expired(rth)) { > if (noref) { > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Sep 30, 2010 at 1:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit : >> Compare operations are more readable, and compilers generate the same code >> for the both. >> > > You have a buggy compiler then. gcc version 4.4.3 (Gentoo 4.4.3-r2 p1.2) rth = rcu_dereference(rth->dst.rt_next)) { if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | (rth->fl.iif ^ iif) | 2f12: 44 3b 80 dc 00 00 00 cmp 0xdc(%rax),%r8d 2f19: 0f 85 a2 00 00 00 jne 2fc1 <ip_route_input_common+0x145 > rth->fl.oif | 2f1f: 83 b8 d8 00 00 00 00 cmpl $0x0,0xd8(%rax) 2f26: 0f 85 95 00 00 00 jne 2fc1 <ip_route_input_common+0x145 > tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif, rt_genid(net)); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->dst.rt_next)) { if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2f2c: 44 3b b8 e4 00 00 00 cmp 0xe4(%rax),%r15d 2f33: 0f 85 88 00 00 00 jne 2fc1 <ip_route_input_common+0x145> ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2f39: 44 3b b0 e8 00 00 00 cmp 0xe8(%rax),%r14d 2f40: 75 7f jne 2fc1 <ip_route_input_common+0x145> > > I know this code is ugly, but please keep it as is, dont add conditional > branches on hot paths. > If the compiler doesn't generate conditional branches, we have to touch every necessary field of all the cache entries in one hash bucket. Is it better than condition branch? I think the compiler developers know it better. And the compiler reorders the conditional branches, is it expected?
Le jeudi 30 septembre 2010 à 14:09 +0800, Changli Gao a écrit : > On Thu, Sep 30, 2010 at 1:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > Le jeudi 30 septembre 2010 à 10:24 +0800, Changli Gao a écrit : > >> Compare operations are more readable, and compilers generate the same code > >> for the both. > >> > > > > You have a buggy compiler then. > > gcc version 4.4.3 (Gentoo 4.4.3-r2 p1.2) > > rth = rcu_dereference(rth->dst.rt_next)) { > if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | > ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | > (rth->fl.iif ^ iif) | > 2f12: 44 3b 80 dc 00 00 00 cmp 0xdc(%rax),%r8d > 2f19: 0f 85 a2 00 00 00 jne 2fc1 <ip_route_input_common+0x145 > > > rth->fl.oif | > 2f1f: 83 b8 d8 00 00 00 00 cmpl $0x0,0xd8(%rax) > 2f26: 0f 85 95 00 00 00 jne 2fc1 <ip_route_input_common+0x145 > > > tos &= IPTOS_RT_MASK; > hash = rt_hash(daddr, saddr, iif, rt_genid(net)); > > for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; > rth = rcu_dereference(rth->dst.rt_next)) { > if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | > 2f2c: 44 3b b8 e4 00 00 00 cmp 0xe4(%rax),%r15d > 2f33: 0f 85 88 00 00 00 jne 2fc1 > <ip_route_input_common+0x145> > ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | > 2f39: 44 3b b0 e8 00 00 00 cmp 0xe8(%rax),%r14d > 2f40: 75 7f jne 2fc1 > <ip_route_input_common+0x145> > > > > > > I know this code is ugly, but please keep it as is, dont add conditional > > branches on hot paths. > > > > If the compiler doesn't generate conditional branches, we have to > touch every necessary field of all the cache entries in one hash > bucket. Is it better than condition branch? I think the compiler > developers know it better. Last famous words. Are you aware of cache lines (64 bytes at least on typical cpus), and that all fields are already in CPU L1 cache ? I (and others) worked hard in the past. > > And the compiler reorders the conditional branches, is it expected? > Your compiler added conditional branches on a code not wanting them, only because on _your_ cpu, these conditional branches might be cheap. Now, try to compile for an i686 target and see the difference. If there was no difference, your compiler would be _buggy_, because not generating optimal assembly. Here I get : c141dda9: 8b 55 e8 mov -0x18(%ebp),%edx c141ddac: 8b 81 9c 00 00 00 mov 0x9c(%ecx),%eax c141ddb2: 33 91 a0 00 00 00 xor 0xa0(%ecx),%edx c141ddb8: 31 f0 xor %esi,%eax c141ddba: 09 d0 or %edx,%eax c141ddbc: 8b 55 e0 mov -0x20(%ebp),%edx c141ddbf: 33 91 94 00 00 00 xor 0x94(%ecx),%edx c141ddc5: 09 d0 or %edx,%eax c141ddc7: 0f b6 55 e7 movzbl -0x19(%ebp),%edx c141ddcb: 0b 81 90 00 00 00 or 0x90(%ecx),%eax c141ddd1: 32 91 a4 00 00 00 xor 0xa4(%ecx),%dl c141ddd7: 0f b6 d2 movzbl %dl,%edx c141ddda: 09 d0 or %edx,%eax c141dddc: 0f 85 9d 00 00 00 jne c141de7f <ip_route_input_common+0x1b4> As you can see, only one conditional branch. Your patch is not welcomed, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f581f77..ef26640 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1338,10 +1338,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, iph2 = ip_hdr(p); - if ((iph->protocol ^ iph2->protocol) | - (iph->tos ^ iph2->tos) | - ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | - ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { + if (iph->protocol != iph2->protocol || iph->tos != iph2->tos || + (__force u32)iph->saddr != (__force u32)iph2->saddr || + (__force u32)iph->daddr != (__force u32)iph2->daddr) { NAPI_GRO_CB(p)->same_flow = 0; continue; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98beda4..6b00fde 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -683,19 +683,18 @@ static inline bool rt_caching(const struct net *net) static inline bool compare_hash_inputs(const struct flowi *fl1, const struct flowi *fl2) { - return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | - (fl1->iif ^ fl2->iif)) == 0); + return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst && + (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src && + fl1->iif == fl2->iif; } static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | - (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + return (__force u32)fl1->fl4_dst == (__force u32)fl2->fl4_dst && + (__force u32)fl1->fl4_src == (__force u32)fl2->fl4_src && + fl1->mark == fl2->mark && + *(u16 *)&fl1->fl4_tos == *(u16 *)&fl2->fl4_tos && + fl1->oif == fl2->oif && fl1->iif == fl2->iif; } static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) @@ -2286,12 +2285,10 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->dst.rt_next)) { - if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | - ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | - (rth->fl.iif ^ iif) | - rth->fl.oif | - (rth->fl.fl4_tos ^ tos)) == 0 && - rth->fl.mark == skb->mark && + if ((__force u32)rth->fl.fl4_dst == (__force u32)daddr && + (__force u32)rth->fl.fl4_src == (__force u32)saddr && + rth->fl.iif == iif && rth->fl.oif == 0 && + rth->fl.fl4_tos == tos && rth->fl.mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { if (noref) {
Compare operations are more readable, and compilers generate the same code for the both. Use the macros fl4_* to shrink the length of the lines. Signed-off-by: Changli Gao <xiaosuo@gmail.com> --- net/ipv4/af_inet.c | 7 +++---- net/ipv4/route.c | 27 ++++++++++++--------------- 2 files changed, 15 insertions(+), 19 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html