diff mbox

[ovs-dev,v3] ipv6 tunneling: Fix for performance drop introduced by ipv6 tunnel support.

Message ID 1453224238-48955-1-git-send-email-sugesh.chandran@intel.com
State Superseded
Headers show

Commit Message

Chandran, Sugesh Jan. 19, 2016, 5:23 p.m. UTC
Adding a new field called dl_type in flow tunnel structure to verify the validity
of tunnel metadata. This field avoids the need of resetting and validating the
entire ipv4/ipv6 tunnel destination address which caused a serious performance
drop.

Fixes: 3ae91c019019 ("tunneling: add IPv6 support to netdev_tunnel_config")
Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
---
 lib/flow.c                   |  7 ++---
 lib/match.c                  | 12 ++++----
 lib/meta-flow.c              | 10 +++++--
 lib/netdev-vport.c           | 10 +++----
 lib/odp-util.c               | 10 +++----
 lib/packets.c                |  7 ++++-
 lib/packets.h                | 69 ++++++++++++++++++++++++++++++++------------
 ofproto/ofproto-dpif-ipfix.c |  3 +-
 ofproto/ofproto-dpif-rid.c   |  3 +-
 ofproto/ofproto-dpif-sflow.c |  6 ++--
 ofproto/tunnel.c             | 12 ++++----
 11 files changed, 96 insertions(+), 53 deletions(-)

Comments

Thadeu Lima de Souza Cascardo Jan. 21, 2016, 4:33 p.m. UTC | #1
On Tue, Jan 19, 2016 at 05:23:58PM +0000, Sugesh Chandran wrote:
> Adding a new field called dl_type in flow tunnel structure to verify the validity
> of tunnel metadata. This field avoids the need of resetting and validating the
> entire ipv4/ipv6 tunnel destination address which caused a serious performance
> drop.
> 
> Fixes: 3ae91c019019 ("tunneling: add IPv6 support to netdev_tunnel_config")
> Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>

Acked-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Daniele Di Proietto Jan. 29, 2016, 2 a.m. UTC | #2
Hi Sugesh,

Sorry for the delay, I've been trying to test different approaches.

I tested the patch and I see that the performance is up again. I'm not
sure whether introducing the 'dl_type' flag is going to be very easy to
maintain, since we have to remember to update when we touch the structure.
Unfortunately that structure is used everywhere in OVS, not just in the
fast path.

I have prepared a workaround to speed up the metadata initialization here:

http://openvswitch.org/pipermail/dev/2016-January/065263.html

In my setup it restores the throughput to the same numbers. It complicates
the code slightly, but the modifications are local to dpif-netdev.

What do you think?

Thanks,

Daniele

On 19/01/2016 09:23, "Sugesh Chandran" <sugesh.chandran@intel.com> wrote:

>Adding a new field called dl_type in flow tunnel structure to verify the
>validity
>of tunnel metadata. This field avoids the need of resetting and
>validating the
>entire ipv4/ipv6 tunnel destination address which caused a serious
>performance
>drop.
>
>Fixes: 3ae91c019019 ("tunneling: add IPv6 support to
>netdev_tunnel_config")
>Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
>---
> lib/flow.c                   |  7 ++---
> lib/match.c                  | 12 ++++----
> lib/meta-flow.c              | 10 +++++--
> lib/netdev-vport.c           | 10 +++----
> lib/odp-util.c               | 10 +++----
> lib/packets.c                |  7 ++++-
> lib/packets.h                | 69
>++++++++++++++++++++++++++++++++------------
> ofproto/ofproto-dpif-ipfix.c |  3 +-
> ofproto/ofproto-dpif-rid.c   |  3 +-
> ofproto/ofproto-dpif-sflow.c |  6 ++--
> ofproto/tunnel.c             | 12 ++++----
> 11 files changed, 96 insertions(+), 53 deletions(-)
>
>diff --git a/lib/flow.c b/lib/flow.c
>index 5668d0c..33aadd1 100644
>--- a/lib/flow.c
>+++ b/lib/flow.c
>@@ -818,15 +818,14 @@ flow_get_metadata(const struct flow *flow, struct
>match *flow_metadata)
>     if (flow->tunnel.ip_src) {
>         match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
>     }
>-    if (flow->tunnel.ip_dst) {
>+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>         match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
>+    } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
>+        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
>     }
>     if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
>         match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
>     }
>-    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
>-        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
>-    }
>     if (flow->tunnel.gbp_id != htons(0)) {
>         match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
>     }
>diff --git a/lib/match.c b/lib/match.c
>index 95d34bc..97a623e 100644
>--- a/lib/match.c
>+++ b/lib/match.c
>@@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32 dst)
> void
> match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32
>mask)
> {
>-    match->wc.masks.tunnel.ip_dst = mask;
>-    match->flow.tunnel.ip_dst = dst & mask;
>+    set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
>+    set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
> }
> 
> void
>@@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match *match,
>const struct in6_addr *src,
> void
> match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst)
> {
>-    match->flow.tunnel.ipv6_dst = *dst;
>-    match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
>+    set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
>+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
> }
> 
> void
> match_set_tun_ipv6_dst_masked(struct match *match, const struct in6_addr
>*dst,
>                               const struct in6_addr *mask)
> {
>-    match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
>-    match->wc.masks.tunnel.ipv6_dst = *mask;
>+    set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask));
>+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
> }
> 
> void
>diff --git a/lib/meta-flow.c b/lib/meta-flow.c
>index 6bd0b99..9d939be 100644
>--- a/lib/meta-flow.c
>+++ b/lib/meta-flow.c
>@@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf,
>         flow->tunnel.ip_src = value->be32;
>         break;
>     case MFF_TUN_DST:
>-        flow->tunnel.ip_dst = value->be32;
>+        set_ipv4_dst_tnl(&flow->tunnel, value->be32);
>         break;
>     case MFF_TUN_IPV6_SRC:
>         flow->tunnel.ipv6_src = value->ipv6;
>         break;
>     case MFF_TUN_IPV6_DST:
>-        flow->tunnel.ipv6_dst = value->ipv6;
>+        set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
>         break;
>     case MFF_TUN_FLAGS:
>         flow->tunnel.flags = (flow->tunnel.flags & ~FLOW_TNL_PUB_F_MASK)
>|
>@@ -1503,6 +1503,12 @@ mf_set_wild(const struct mf_field *mf, struct
>match *match, char **err_str)
>                sizeof match->wc.masks.tunnel.ipv6_dst);
>         memset(&match->flow.tunnel.ipv6_dst, 0,
>                sizeof match->flow.tunnel.ipv6_dst);
>+        /* What if flow have a valid ipv4 tunnel data??
>+         * Reset the dl_type only if thats not the case.
>+         */
>+        match->wc.masks.tunnel.dl_type = (match->wc.masks.tunnel.dl_type
>==
>+                                          htons(ETH_TYPE_IP)) ?
>+                                          match->wc.masks.tunnel.dl_type
>: 0;
>         break;
>     case MFF_TUN_FLAGS:
>         match_set_tun_flags_masked(match, 0, 0);
>diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
>index 88f5022..94ddf59 100644
>--- a/lib/netdev-vport.c
>+++ b/lib/netdev-vport.c
>@@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct
>flow_tnl *tnl,
> {
>     void *nh;
>     struct ip_header *ip;
>-    struct ovs_16aligned_ip6_hdr *ip6;
>+    struct ip6_hdr *ip6;
>     void *l4;
>     int l3_size;
> 
>@@ -919,19 +919,19 @@ ip_extract_tnl_md(struct dp_packet *packet, struct
>flow_tnl *tnl,
>         ip_dst = get_16aligned_be32(&ip->ip_dst);
> 
>         tnl->ip_src = ip_src;
>-        tnl->ip_dst = ip_dst;
>         tnl->ip_tos = ip->ip_tos;
>         tnl->ip_ttl = ip->ip_ttl;
>+        set_ipv4_dst_tnl(tnl, ip_dst);
> 
>         *hlen += IP_HEADER_LEN;
> 
>     } else if (IP_VER(ip->ip_ihl_ver) == 6) {
> 
>-        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof
>ip6->ip6_src);
>-        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof
>ip6->ip6_dst);
>+        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr,
>+               sizeof ip6->ip6_src);
>         tnl->ip_tos = 0;
>         tnl->ip_ttl = ip6->ip6_hlim;
>-
>+        set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
>         *hlen += IPV6_HEADER_LEN;
> 
>     } else {
>diff --git a/lib/odp-util.c b/lib/odp-util.c
>index f16e113..309bb9c 100644
>--- a/lib/odp-util.c
>+++ b/lib/odp-util.c
>@@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct
>ovs_action_push_tnl *data)
>                       gnh->oam ? "oam," : "",
>                       gnh->critical ? "crit," : "",
>                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
>- 
>+
>         if (gnh->opt_len) {
>             ds_put_cstr(ds, ",options(");
>             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
>@@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr *attr,
>             tun->ip_src = nl_attr_get_be32(a);
>             break;
>         case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
>-            tun->ip_dst = nl_attr_get_be32(a);
>+            set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
>             break;
>         case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
>             tun->ipv6_src = nl_attr_get_in6_addr(a);
>             break;
>         case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
>-            tun->ipv6_dst = nl_attr_get_in6_addr(a);
>+            set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
>             break;
>         case OVS_TUNNEL_KEY_ATTR_TOS:
>             tun->ip_tos = nl_attr_get_u8(a);
>@@ -1961,13 +1961,13 @@ tun_key_to_attr(struct ofpbuf *a, const struct
>flow_tnl *tun_key,
>     if (tun_key->ip_src) {
>         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
>tun_key->ip_src);
>     }
>-    if (tun_key->ip_dst) {
>+    if (tun_key->dl_type == htons(ETH_TYPE_IP)) {
>         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
>tun_key->ip_dst);
>     }
>     if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
>         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
>&tun_key->ipv6_src);
>     }
>-    if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
>+    if (tun_key->dl_type == htons(ETH_TYPE_IPV6)) {
>         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
>&tun_key->ipv6_dst);
>     }
>     if (tun_key->ip_tos) {
>diff --git a/lib/packets.c b/lib/packets.c
>index d82341d..4f00c0e 100644
>--- a/lib/packets.c
>+++ b/lib/packets.c
>@@ -39,7 +39,12 @@ const struct in6_addr in6addr_all_hosts =
>IN6ADDR_ALL_HOSTS_INIT;
> struct in6_addr
> flow_tnl_dst(const struct flow_tnl *tnl)
> {
>-    return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) :
>tnl->ipv6_dst;
>+    if (tnl->dl_type == htons(ETH_TYPE_IP)) {
>+        return in6_addr_mapped_ipv4(tnl->ip_dst);
>+    } else if (tnl->dl_type == htons(ETH_TYPE_IPV6)) {
>+        return tnl->ipv6_dst;
>+    }
>+    return in6addr_any;
> }
> 
> struct in6_addr
>diff --git a/lib/packets.h b/lib/packets.h
>index 834e8a4..7e8dd91 100644
>--- a/lib/packets.h
>+++ b/lib/packets.h
>@@ -35,8 +35,26 @@
> struct dp_packet;
> struct ds;
> 
>+#define ETH_TYPE_IP            0x0800
>+#define ETH_TYPE_ARP           0x0806
>+#define ETH_TYPE_TEB           0x6558
>+#define ETH_TYPE_VLAN_8021Q    0x8100
>+#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
>+#define ETH_TYPE_VLAN_8021AD   0x88a8
>+#define ETH_TYPE_IPV6          0x86dd
>+#define ETH_TYPE_LACP          0x8809
>+#define ETH_TYPE_RARP          0x8035
>+#define ETH_TYPE_MPLS          0x8847
>+#define ETH_TYPE_MPLS_MCAST    0x8848
>+
> /* Tunnel information used in flow key and metadata. */
> struct flow_tnl {
>+    /* The tunnel destination ip/ipv6 address are not initializing to
>zero
>+     * due to the performance constrains. the ethernet type field
>'dl_type' is
>+     * used to validate the destination addresses. 'dl_type' is set to
>zero
>+     * for a invalid flow_tnl entry.
>+     */
>+    ovs_be16 dl_type;
>     ovs_be32 ip_dst;
>     struct in6_addr ipv6_dst;
>     ovs_be32 ip_src;
>@@ -49,7 +67,7 @@ struct flow_tnl {
>     ovs_be16 tp_dst;
>     ovs_be16 gbp_id;
>     uint8_t  gbp_flags;
>-    uint8_t  pad1[5];        /* Pad to 64 bits. */
>+    uint8_t  pad1[1];        /* Pad to 64 bits. */
>     struct tun_metadata metadata;
> };
> 
>@@ -76,10 +94,36 @@ struct flow_tnl {
> 
> static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
> 
>+static inline void
>+set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst)
>+{
>+    if (tnl) {
>+        tnl->ip_dst = ip_dst;
>+        if (tnl->ip_dst) {
>+            tnl->dl_type = htons(ETH_TYPE_IP);
>+        } else {
>+            tnl->dl_type = 0;
>+        }
>+    }
>+}
>+
>+static inline void
>+set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst)
>+{
>+    if (tnl) {
>+        tnl->ipv6_dst = ipv6_dst;
>+        if (ipv6_addr_is_set(&tnl->ipv6_dst)) {
>+            tnl->dl_type = htons(ETH_TYPE_IPV6);
>+        } else {
>+            tnl->dl_type = 0;
>+        }
>+    }
>+}
>+
> static inline bool
> flow_tnl_dst_is_set(const struct flow_tnl *tnl)
> {
>-    return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
>+    return (tnl->dl_type != 0);
> }
> 
> struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl);
>@@ -90,8 +134,8 @@ static inline size_t
> flow_tnl_size(const struct flow_tnl *src)
> {
>     if (!flow_tnl_dst_is_set(src)) {
>-        /* Covers ip_dst and ipv6_dst only. */
>-        return offsetof(struct flow_tnl, ip_src);
>+        /* Covers dl_type field only. */
>+        return offsetof(struct flow_tnl, ip_dst);
>     }
>     if (src->flags & FLOW_TNL_F_UDPIF) {
>         /* Datapath format, cover all options we have. */
>@@ -154,11 +198,10 @@ static inline void
> pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
> {
>     /* It can be expensive to zero out all of the tunnel metadata.
>However,
>-     * we can just zero out ip_dst and the rest of the data will never be
>+     * we can just zero out dl_type and the rest of the data will never
>be
>      * looked at. */
>     memset(md, 0, offsetof(struct pkt_metadata, in_port));
>-    md->tunnel.ip_dst = 0;
>-    md->tunnel.ipv6_dst = in6addr_any;
>+    md->tunnel.dl_type = 0;
> 
>     md->in_port.odp_port = port;
> }
>@@ -347,18 +390,6 @@ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t
>tc, uint8_t bos,
> #define ETH_ADDR_SCAN_ARGS(EA) \
>     &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4],
>&(EA).ea[5]
> 
>-#define ETH_TYPE_IP            0x0800
>-#define ETH_TYPE_ARP           0x0806
>-#define ETH_TYPE_TEB           0x6558
>-#define ETH_TYPE_VLAN_8021Q    0x8100
>-#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
>-#define ETH_TYPE_VLAN_8021AD   0x88a8
>-#define ETH_TYPE_IPV6          0x86dd
>-#define ETH_TYPE_LACP          0x8809
>-#define ETH_TYPE_RARP          0x8035
>-#define ETH_TYPE_MPLS          0x8847
>-#define ETH_TYPE_MPLS_MCAST    0x8848
>-
> static inline bool eth_type_mpls(ovs_be16 eth_type)
> {
>     return eth_type == htons(ETH_TYPE_MPLS) ||
>diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
>index a610c53..ca06583 100644
>--- a/ofproto/ofproto-dpif-ipfix.c
>+++ b/ofproto/ofproto-dpif-ipfix.c
>@@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di,
>const struct dp_packet *packet,
>      * of matched packets. */
>     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
>     if (di->bridge_exporter.options->enable_tunnel_sampling) {
>-        if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
>+        if (output_odp_port == ODPP_NONE &&
>+                flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>             /* Input tunnel. */
>             tunnel_key = &flow->tunnel;
>             tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
>diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
>index d142933..735b5f3 100644
>--- a/ofproto/ofproto-dpif-rid.c
>+++ b/ofproto/ofproto-dpif-rid.c
>@@ -297,8 +297,7 @@ uint32_t
> recirc_alloc_id(struct ofproto_dpif *ofproto)
> {
>     struct flow_tnl tunnel;
>-    tunnel.ip_dst = htonl(0);
>-    tunnel.ipv6_dst = in6addr_any;
>+    tunnel.dl_type = 0;
>     struct recirc_state state = {
>         .table_id = TBL_INTERNAL,
>         .ofproto = ofproto,
>diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
>index f11699c..ea21a1a 100644
>--- a/ofproto/ofproto-dpif-sflow.c
>+++ b/ofproto/ofproto-dpif-sflow.c
>@@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr *attr,
>     /* IPv4 */
>     /* Cannot assume alignment so just use memcpy. */
>     sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
>-    sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
>+    set_ipv4_dst_tnl(&sflow_actions->tunnel,
>get_16aligned_be32(&ip->ip_dst));
>     sflow_actions->tunnel.ip_tos = ip->ip_tos;
>     sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
>     /* The tnl_push action can supply the ip_protocol too. */
>@@ -991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr,
>                 sflow_actions->tunnel.ip_src = key->ipv4_src;
>             }
>             if (key->ipv4_dst) {
>-                sflow_actions->tunnel.ip_dst = key->ipv4_dst;
>+                set_ipv4_dst_tnl(&sflow_actions->tunnel, key->ipv4_dst);
>             }
>             if (key->ipv4_proto) {
>                 sflow_actions->tunnel_ipproto = key->ipv4_proto;
>@@ -1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const
>struct dp_packet *packet,
>     fs.output = cookie->sflow.output;
> 
>     /* Input tunnel. */
>-    if (flow->tunnel.ip_dst) {
>+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
> 	memset(&tnlInElem, 0, sizeof(tnlInElem));
> 	tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
> 	tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
>diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
>index 5cf6c75..b2bd254 100644
>--- a/ofproto/tunnel.c
>+++ b/ofproto/tunnel.c
>@@ -362,12 +362,12 @@ tnl_wc_init(struct flow *flow, struct
>flow_wildcards *wc)
> {
>     if (tnl_port_should_receive(flow)) {
>         wc->masks.tunnel.tun_id = OVS_BE64_MAX;
>-        if (flow->tunnel.ip_dst) {
>+        if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>             wc->masks.tunnel.ip_src = OVS_BE32_MAX;
>-            wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
>-        } else {
>+            set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
>+        } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
>             wc->masks.tunnel.ipv6_src = in6addr_exact;
>-            wc->masks.tunnel.ipv6_dst = in6addr_exact;
>+            set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
>         }
>         wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
>                                   FLOW_TNL_F_CSUM |
>@@ -424,7 +424,9 @@ tnl_port_send(const struct ofport_dpif *ofport,
>struct flow *flow,
>     if (!cfg->ip_dst_flow) {
>         flow->tunnel.ip_dst =
>in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst);
>         if (!flow->tunnel.ip_dst) {
>-            flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
>+            set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
>+        } else {
>+            set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
>         }
>     }
>     flow->pkt_mark = tnl_port->match.pkt_mark;
>-- 
>1.9.1
>
>_______________________________________________
>dev mailing list
>dev@openvswitch.org
>http://openvswitch.org/mailman/listinfo/dev
Chandran, Sugesh Feb. 1, 2016, 8:30 p.m. UTC | #3
Hi Daniele,

Thank you for sending out the patch.
The proposal looks fine for me and also I verified that the performance is restored with the patch.

One suggestion is , Do we really have to pass two parameters to the "dp_netdev_input" to init and validate the metadata.?
Can we use  the pointer to the port_id/dp_netdev_port. This way there wont be a dependency between arguments to a function.
What do you think?? Does it make more complicated??



Regards
_Sugesh


> -----Original Message-----
> From: Daniele Di Proietto [mailto:diproiettod@vmware.com]
> Sent: Friday, January 29, 2016 2:01 AM
> To: Chandran, Sugesh <sugesh.chandran@intel.com>; jbenc@redhat.com;
> cascardo@redhat.com
> Cc: dev@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH v3] ipv6 tunneling: Fix for performance drop
> introduced by ipv6 tunnel support.
> 
> Hi Sugesh,
> 
> Sorry for the delay, I've been trying to test different approaches.
> 
> I tested the patch and I see that the performance is up again. I'm not sure
> whether introducing the 'dl_type' flag is going to be very easy to maintain,
> since we have to remember to update when we touch the structure.
> Unfortunately that structure is used everywhere in OVS, not just in the fast
> path.
> 
> I have prepared a workaround to speed up the metadata initialization here:
> 
> http://openvswitch.org/pipermail/dev/2016-January/065263.html
> 
> In my setup it restores the throughput to the same numbers. It complicates
> the code slightly, but the modifications are local to dpif-netdev.
> 
> What do you think?
> 
> Thanks,
> 
> Daniele
> 
> On 19/01/2016 09:23, "Sugesh Chandran" <sugesh.chandran@intel.com>
> wrote:
> 
> >Adding a new field called dl_type in flow tunnel structure to verify
> >the validity of tunnel metadata. This field avoids the need of
> >resetting and validating the entire ipv4/ipv6 tunnel destination
> >address which caused a serious performance drop.
> >
> >Fixes: 3ae91c019019 ("tunneling: add IPv6 support to
> >netdev_tunnel_config")
> >Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
> >---
> > lib/flow.c                   |  7 ++---
> > lib/match.c                  | 12 ++++----
> > lib/meta-flow.c              | 10 +++++--
> > lib/netdev-vport.c           | 10 +++----
> > lib/odp-util.c               | 10 +++----
> > lib/packets.c                |  7 ++++-
> > lib/packets.h                | 69
> >++++++++++++++++++++++++++++++++------------
> > ofproto/ofproto-dpif-ipfix.c |  3 +-
> > ofproto/ofproto-dpif-rid.c   |  3 +-
> > ofproto/ofproto-dpif-sflow.c |  6 ++--
> > ofproto/tunnel.c             | 12 ++++----
> > 11 files changed, 96 insertions(+), 53 deletions(-)
> >
> >diff --git a/lib/flow.c b/lib/flow.c
> >index 5668d0c..33aadd1 100644
> >--- a/lib/flow.c
> >+++ b/lib/flow.c
> >@@ -818,15 +818,14 @@ flow_get_metadata(const struct flow *flow,
> struct
> >match *flow_metadata)
> >     if (flow->tunnel.ip_src) {
> >         match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
> >     }
> >-    if (flow->tunnel.ip_dst) {
> >+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
> >         match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
> >+    } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
> >+        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
> >     }
> >     if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
> >         match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
> >     }
> >-    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
> >-        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
> >-    }
> >     if (flow->tunnel.gbp_id != htons(0)) {
> >         match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
> >     }
> >diff --git a/lib/match.c b/lib/match.c
> >index 95d34bc..97a623e 100644
> >--- a/lib/match.c
> >+++ b/lib/match.c
> >@@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32
> >dst)  void  match_set_tun_dst_masked(struct match *match, ovs_be32 dst,
> >ovs_be32
> >mask)
> > {
> >-    match->wc.masks.tunnel.ip_dst = mask;
> >-    match->flow.tunnel.ip_dst = dst & mask;
> >+    set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
> >+    set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
> > }
> >
> > void
> >@@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match
> >*match, const struct in6_addr *src,  void
> >match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst)
> >{
> >-    match->flow.tunnel.ipv6_dst = *dst;
> >-    match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
> >+    set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
> >+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
> > }
> >
> > void
> > match_set_tun_ipv6_dst_masked(struct match *match, const struct
> >in6_addr *dst,
> >                               const struct in6_addr *mask)  {
> >-    match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
> >-    match->wc.masks.tunnel.ipv6_dst = *mask;
> >+    set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask));
> >+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
> > }
> >
> > void
> >diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 6bd0b99..9d939be
> >100644
> >--- a/lib/meta-flow.c
> >+++ b/lib/meta-flow.c
> >@@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf,
> >         flow->tunnel.ip_src = value->be32;
> >         break;
> >     case MFF_TUN_DST:
> >-        flow->tunnel.ip_dst = value->be32;
> >+        set_ipv4_dst_tnl(&flow->tunnel, value->be32);
> >         break;
> >     case MFF_TUN_IPV6_SRC:
> >         flow->tunnel.ipv6_src = value->ipv6;
> >         break;
> >     case MFF_TUN_IPV6_DST:
> >-        flow->tunnel.ipv6_dst = value->ipv6;
> >+        set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
> >         break;
> >     case MFF_TUN_FLAGS:
> >         flow->tunnel.flags = (flow->tunnel.flags &
> > ~FLOW_TNL_PUB_F_MASK)
> >|
> >@@ -1503,6 +1503,12 @@ mf_set_wild(const struct mf_field *mf, struct
> >match *match, char **err_str)
> >                sizeof match->wc.masks.tunnel.ipv6_dst);
> >         memset(&match->flow.tunnel.ipv6_dst, 0,
> >                sizeof match->flow.tunnel.ipv6_dst);
> >+        /* What if flow have a valid ipv4 tunnel data??
> >+         * Reset the dl_type only if thats not the case.
> >+         */
> >+        match->wc.masks.tunnel.dl_type =
> >+ (match->wc.masks.tunnel.dl_type
> >==
> >+                                          htons(ETH_TYPE_IP)) ?
> >+
> >+ match->wc.masks.tunnel.dl_type
> >: 0;
> >         break;
> >     case MFF_TUN_FLAGS:
> >         match_set_tun_flags_masked(match, 0, 0); diff --git
> >a/lib/netdev-vport.c b/lib/netdev-vport.c index 88f5022..94ddf59 100644
> >--- a/lib/netdev-vport.c
> >+++ b/lib/netdev-vport.c
> >@@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct
> >flow_tnl *tnl,  {
> >     void *nh;
> >     struct ip_header *ip;
> >-    struct ovs_16aligned_ip6_hdr *ip6;
> >+    struct ip6_hdr *ip6;
> >     void *l4;
> >     int l3_size;
> >
> >@@ -919,19 +919,19 @@ ip_extract_tnl_md(struct dp_packet *packet,
> >struct flow_tnl *tnl,
> >         ip_dst = get_16aligned_be32(&ip->ip_dst);
> >
> >         tnl->ip_src = ip_src;
> >-        tnl->ip_dst = ip_dst;
> >         tnl->ip_tos = ip->ip_tos;
> >         tnl->ip_ttl = ip->ip_ttl;
> >+        set_ipv4_dst_tnl(tnl, ip_dst);
> >
> >         *hlen += IP_HEADER_LEN;
> >
> >     } else if (IP_VER(ip->ip_ihl_ver) == 6) {
> >
> >-        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof
> >ip6->ip6_src);
> >-        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof
> >ip6->ip6_dst);
> >+        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr,
> >+               sizeof ip6->ip6_src);
> >         tnl->ip_tos = 0;
> >         tnl->ip_ttl = ip6->ip6_hlim;
> >-
> >+        set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
> >         *hlen += IPV6_HEADER_LEN;
> >
> >     } else {
> >diff --git a/lib/odp-util.c b/lib/odp-util.c index f16e113..309bb9c
> >100644
> >--- a/lib/odp-util.c
> >+++ b/lib/odp-util.c
> >@@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct
> >ovs_action_push_tnl *data)
> >                       gnh->oam ? "oam," : "",
> >                       gnh->critical ? "crit," : "",
> >                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
> >-
> >+
> >         if (gnh->opt_len) {
> >             ds_put_cstr(ds, ",options(");
> >             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
> >@@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr
> *attr,
> >             tun->ip_src = nl_attr_get_be32(a);
> >             break;
> >         case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
> >-            tun->ip_dst = nl_attr_get_be32(a);
> >+            set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
> >             break;
> >         case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
> >             tun->ipv6_src = nl_attr_get_in6_addr(a);
> >             break;
> >         case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
> >-            tun->ipv6_dst = nl_attr_get_in6_addr(a);
> >+            set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
> >             break;
> >         case OVS_TUNNEL_KEY_ATTR_TOS:
> >             tun->ip_tos = nl_attr_get_u8(a); @@ -1961,13 +1961,13 @@
> >tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
> >     if (tun_key->ip_src) {
> >         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
> >tun_key->ip_src);
> >     }
> >-    if (tun_key->ip_dst) {
> >+    if (tun_key->dl_type == htons(ETH_TYPE_IP)) {
> >         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
> >tun_key->ip_dst);
> >     }
> >     if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
> >         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
> >&tun_key->ipv6_src);
> >     }
> >-    if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
> >+    if (tun_key->dl_type == htons(ETH_TYPE_IPV6)) {
> >         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
> >&tun_key->ipv6_dst);
> >     }
> >     if (tun_key->ip_tos) {
> >diff --git a/lib/packets.c b/lib/packets.c index d82341d..4f00c0e
> >100644
> >--- a/lib/packets.c
> >+++ b/lib/packets.c
> >@@ -39,7 +39,12 @@ const struct in6_addr in6addr_all_hosts =
> >IN6ADDR_ALL_HOSTS_INIT;  struct in6_addr  flow_tnl_dst(const struct
> >flow_tnl *tnl)  {
> >-    return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) :
> >tnl->ipv6_dst;
> >+    if (tnl->dl_type == htons(ETH_TYPE_IP)) {
> >+        return in6_addr_mapped_ipv4(tnl->ip_dst);
> >+    } else if (tnl->dl_type == htons(ETH_TYPE_IPV6)) {
> >+        return tnl->ipv6_dst;
> >+    }
> >+    return in6addr_any;
> > }
> >
> > struct in6_addr
> >diff --git a/lib/packets.h b/lib/packets.h index 834e8a4..7e8dd91
> >100644
> >--- a/lib/packets.h
> >+++ b/lib/packets.h
> >@@ -35,8 +35,26 @@
> > struct dp_packet;
> > struct ds;
> >
> >+#define ETH_TYPE_IP            0x0800
> >+#define ETH_TYPE_ARP           0x0806
> >+#define ETH_TYPE_TEB           0x6558
> >+#define ETH_TYPE_VLAN_8021Q    0x8100
> >+#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
> >+#define ETH_TYPE_VLAN_8021AD   0x88a8
> >+#define ETH_TYPE_IPV6          0x86dd
> >+#define ETH_TYPE_LACP          0x8809
> >+#define ETH_TYPE_RARP          0x8035
> >+#define ETH_TYPE_MPLS          0x8847
> >+#define ETH_TYPE_MPLS_MCAST    0x8848
> >+
> > /* Tunnel information used in flow key and metadata. */ struct
> > flow_tnl {
> >+    /* The tunnel destination ip/ipv6 address are not initializing to
> >zero
> >+     * due to the performance constrains. the ethernet type field
> >'dl_type' is
> >+     * used to validate the destination addresses. 'dl_type' is set to
> >zero
> >+     * for a invalid flow_tnl entry.
> >+     */
> >+    ovs_be16 dl_type;
> >     ovs_be32 ip_dst;
> >     struct in6_addr ipv6_dst;
> >     ovs_be32 ip_src;
> >@@ -49,7 +67,7 @@ struct flow_tnl {
> >     ovs_be16 tp_dst;
> >     ovs_be16 gbp_id;
> >     uint8_t  gbp_flags;
> >-    uint8_t  pad1[5];        /* Pad to 64 bits. */
> >+    uint8_t  pad1[1];        /* Pad to 64 bits. */
> >     struct tun_metadata metadata;
> > };
> >
> >@@ -76,10 +94,36 @@ struct flow_tnl {
> >
> > static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
> >
> >+static inline void
> >+set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst) {
> >+    if (tnl) {
> >+        tnl->ip_dst = ip_dst;
> >+        if (tnl->ip_dst) {
> >+            tnl->dl_type = htons(ETH_TYPE_IP);
> >+        } else {
> >+            tnl->dl_type = 0;
> >+        }
> >+    }
> >+}
> >+
> >+static inline void
> >+set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst) {
> >+    if (tnl) {
> >+        tnl->ipv6_dst = ipv6_dst;
> >+        if (ipv6_addr_is_set(&tnl->ipv6_dst)) {
> >+            tnl->dl_type = htons(ETH_TYPE_IPV6);
> >+        } else {
> >+            tnl->dl_type = 0;
> >+        }
> >+    }
> >+}
> >+
> > static inline bool
> > flow_tnl_dst_is_set(const struct flow_tnl *tnl)  {
> >-    return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
> >+    return (tnl->dl_type != 0);
> > }
> >
> > struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl); @@ -90,8
> >+134,8 @@ static inline size_t  flow_tnl_size(const struct flow_tnl
> >*src)  {
> >     if (!flow_tnl_dst_is_set(src)) {
> >-        /* Covers ip_dst and ipv6_dst only. */
> >-        return offsetof(struct flow_tnl, ip_src);
> >+        /* Covers dl_type field only. */
> >+        return offsetof(struct flow_tnl, ip_dst);
> >     }
> >     if (src->flags & FLOW_TNL_F_UDPIF) {
> >         /* Datapath format, cover all options we have. */ @@ -154,11
> >+198,10 @@ static inline void  pkt_metadata_init(struct pkt_metadata
> >*md, odp_port_t port)  {
> >     /* It can be expensive to zero out all of the tunnel metadata.
> >However,
> >-     * we can just zero out ip_dst and the rest of the data will never be
> >+     * we can just zero out dl_type and the rest of the data will
> >+ never
> >be
> >      * looked at. */
> >     memset(md, 0, offsetof(struct pkt_metadata, in_port));
> >-    md->tunnel.ip_dst = 0;
> >-    md->tunnel.ipv6_dst = in6addr_any;
> >+    md->tunnel.dl_type = 0;
> >
> >     md->in_port.odp_port = port;
> > }
> >@@ -347,18 +390,6 @@ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t
> >tc, uint8_t bos,  #define ETH_ADDR_SCAN_ARGS(EA) \
> >     &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4],
> >&(EA).ea[5]
> >
> >-#define ETH_TYPE_IP            0x0800
> >-#define ETH_TYPE_ARP           0x0806
> >-#define ETH_TYPE_TEB           0x6558
> >-#define ETH_TYPE_VLAN_8021Q    0x8100
> >-#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
> >-#define ETH_TYPE_VLAN_8021AD   0x88a8
> >-#define ETH_TYPE_IPV6          0x86dd
> >-#define ETH_TYPE_LACP          0x8809
> >-#define ETH_TYPE_RARP          0x8035
> >-#define ETH_TYPE_MPLS          0x8847
> >-#define ETH_TYPE_MPLS_MCAST    0x8848
> >-
> > static inline bool eth_type_mpls(ovs_be16 eth_type)  {
> >     return eth_type == htons(ETH_TYPE_MPLS) || diff --git
> >a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index
> >a610c53..ca06583 100644
> >--- a/ofproto/ofproto-dpif-ipfix.c
> >+++ b/ofproto/ofproto-dpif-ipfix.c
> >@@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di,
> >const struct dp_packet *packet,
> >      * of matched packets. */
> >     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
> >     if (di->bridge_exporter.options->enable_tunnel_sampling) {
> >-        if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
> >+        if (output_odp_port == ODPP_NONE &&
> >+                flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
> >             /* Input tunnel. */
> >             tunnel_key = &flow->tunnel;
> >             tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
> >diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
> >index d142933..735b5f3 100644
> >--- a/ofproto/ofproto-dpif-rid.c
> >+++ b/ofproto/ofproto-dpif-rid.c
> >@@ -297,8 +297,7 @@ uint32_t
> > recirc_alloc_id(struct ofproto_dpif *ofproto)  {
> >     struct flow_tnl tunnel;
> >-    tunnel.ip_dst = htonl(0);
> >-    tunnel.ipv6_dst = in6addr_any;
> >+    tunnel.dl_type = 0;
> >     struct recirc_state state = {
> >         .table_id = TBL_INTERNAL,
> >         .ofproto = ofproto,
> >diff --git a/ofproto/ofproto-dpif-sflow.c
> >b/ofproto/ofproto-dpif-sflow.c index f11699c..ea21a1a 100644
> >--- a/ofproto/ofproto-dpif-sflow.c
> >+++ b/ofproto/ofproto-dpif-sflow.c
> >@@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr
> *attr,
> >     /* IPv4 */
> >     /* Cannot assume alignment so just use memcpy. */
> >     sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
> >-    sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
> >+    set_ipv4_dst_tnl(&sflow_actions->tunnel,
> >get_16aligned_be32(&ip->ip_dst));
> >     sflow_actions->tunnel.ip_tos = ip->ip_tos;
> >     sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
> >     /* The tnl_push action can supply the ip_protocol too. */ @@
> >-991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr,
> >                 sflow_actions->tunnel.ip_src = key->ipv4_src;
> >             }
> >             if (key->ipv4_dst) {
> >-                sflow_actions->tunnel.ip_dst = key->ipv4_dst;
> >+                set_ipv4_dst_tnl(&sflow_actions->tunnel,
> >+ key->ipv4_dst);
> >             }
> >             if (key->ipv4_proto) {
> >                 sflow_actions->tunnel_ipproto = key->ipv4_proto; @@
> >-1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const
> >struct dp_packet *packet,
> >     fs.output = cookie->sflow.output;
> >
> >     /* Input tunnel. */
> >-    if (flow->tunnel.ip_dst) {
> >+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
> > 	memset(&tnlInElem, 0, sizeof(tnlInElem));
> > 	tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
> > 	tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
> >diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 5cf6c75..b2bd254
> >100644
> >--- a/ofproto/tunnel.c
> >+++ b/ofproto/tunnel.c
> >@@ -362,12 +362,12 @@ tnl_wc_init(struct flow *flow, struct
> >flow_wildcards *wc)  {
> >     if (tnl_port_should_receive(flow)) {
> >         wc->masks.tunnel.tun_id = OVS_BE64_MAX;
> >-        if (flow->tunnel.ip_dst) {
> >+        if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
> >             wc->masks.tunnel.ip_src = OVS_BE32_MAX;
> >-            wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
> >-        } else {
> >+            set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
> >+        } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
> >             wc->masks.tunnel.ipv6_src = in6addr_exact;
> >-            wc->masks.tunnel.ipv6_dst = in6addr_exact;
> >+            set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
> >         }
> >         wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
> >                                   FLOW_TNL_F_CSUM | @@ -424,7 +424,9
> >@@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
> >     if (!cfg->ip_dst_flow) {
> >         flow->tunnel.ip_dst =
> >in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst);
> >         if (!flow->tunnel.ip_dst) {
> >-            flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
> >+            set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
> >+        } else {
> >+            set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
> >         }
> >     }
> >     flow->pkt_mark = tnl_port->match.pkt_mark;
> >--
> >1.9.1
> >
> >_______________________________________________
> >dev mailing list
> >dev@openvswitch.org
> >http://openvswitch.org/mailman/listinfo/dev
Daniele Di Proietto Feb. 2, 2016, 3:34 a.m. UTC | #4
On 01/02/2016 12:30, "Chandran, Sugesh" <sugesh.chandran@intel.com> wrote:

>Hi Daniele,
>
>Thank you for sending out the patch.
>The proposal looks fine for me and also I verified that the performance
>is restored with the patch.

Thanks!

>
>One suggestion is , Do we really have to pass two parameters to the
>"dp_netdev_input" to init and validate the metadata.?
>Can we use  the pointer to the port_id/dp_netdev_port. This way there
>wont be a dependency between arguments to a function.
>What do you think?? Does it make more complicated??

I've sent a new version with some suggestions from Andy that
avoids passing two parameters when calling the function
(or at least it hides it).

http://openvswitch.org/pipermail/dev/2016-February/065441.html

What do you think?

Thanks,

Daniele

>
>
>
>Regards
>_Sugesh
>
>
>> -----Original Message-----
>> From: Daniele Di Proietto [mailto:diproiettod@vmware.com]
>> Sent: Friday, January 29, 2016 2:01 AM
>> To: Chandran, Sugesh <sugesh.chandran@intel.com>; jbenc@redhat.com;
>> cascardo@redhat.com
>> Cc: dev@openvswitch.org
>> Subject: Re: [ovs-dev] [PATCH v3] ipv6 tunneling: Fix for performance
>>drop
>> introduced by ipv6 tunnel support.
>> 
>> Hi Sugesh,
>> 
>> Sorry for the delay, I've been trying to test different approaches.
>> 
>> I tested the patch and I see that the performance is up again. I'm not
>>sure
>> whether introducing the 'dl_type' flag is going to be very easy to
>>maintain,
>> since we have to remember to update when we touch the structure.
>> Unfortunately that structure is used everywhere in OVS, not just in the
>>fast
>> path.
>> 
>> I have prepared a workaround to speed up the metadata initialization
>>here:
>> 
>> http://openvswitch.org/pipermail/dev/2016-January/065263.html
>> 
>> In my setup it restores the throughput to the same numbers. It
>>complicates
>> the code slightly, but the modifications are local to dpif-netdev.
>> 
>> What do you think?
>> 
>> Thanks,
>> 
>> Daniele
>> 
>> On 19/01/2016 09:23, "Sugesh Chandran" <sugesh.chandran@intel.com>
>> wrote:
>> 
>> >Adding a new field called dl_type in flow tunnel structure to verify
>> >the validity of tunnel metadata. This field avoids the need of
>> >resetting and validating the entire ipv4/ipv6 tunnel destination
>> >address which caused a serious performance drop.
>> >
>> >Fixes: 3ae91c019019 ("tunneling: add IPv6 support to
>> >netdev_tunnel_config")
>> >Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
>> >---
>> > lib/flow.c                   |  7 ++---
>> > lib/match.c                  | 12 ++++----
>> > lib/meta-flow.c              | 10 +++++--
>> > lib/netdev-vport.c           | 10 +++----
>> > lib/odp-util.c               | 10 +++----
>> > lib/packets.c                |  7 ++++-
>> > lib/packets.h                | 69
>> >++++++++++++++++++++++++++++++++------------
>> > ofproto/ofproto-dpif-ipfix.c |  3 +-
>> > ofproto/ofproto-dpif-rid.c   |  3 +-
>> > ofproto/ofproto-dpif-sflow.c |  6 ++--
>> > ofproto/tunnel.c             | 12 ++++----
>> > 11 files changed, 96 insertions(+), 53 deletions(-)
>> >
>> >diff --git a/lib/flow.c b/lib/flow.c
>> >index 5668d0c..33aadd1 100644
>> >--- a/lib/flow.c
>> >+++ b/lib/flow.c
>> >@@ -818,15 +818,14 @@ flow_get_metadata(const struct flow *flow,
>> struct
>> >match *flow_metadata)
>> >     if (flow->tunnel.ip_src) {
>> >         match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
>> >     }
>> >-    if (flow->tunnel.ip_dst) {
>> >+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>> >         match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
>> >+    } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
>> >+        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
>> >     }
>> >     if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
>> >         match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
>> >     }
>> >-    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
>> >-        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
>> >-    }
>> >     if (flow->tunnel.gbp_id != htons(0)) {
>> >         match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
>> >     }
>> >diff --git a/lib/match.c b/lib/match.c
>> >index 95d34bc..97a623e 100644
>> >--- a/lib/match.c
>> >+++ b/lib/match.c
>> >@@ -186,8 +186,8 @@ match_set_tun_dst(struct match *match, ovs_be32
>> >dst)  void  match_set_tun_dst_masked(struct match *match, ovs_be32 dst,
>> >ovs_be32
>> >mask)
>> > {
>> >-    match->wc.masks.tunnel.ip_dst = mask;
>> >-    match->flow.tunnel.ip_dst = dst & mask;
>> >+    set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
>> >+    set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
>> > }
>> >
>> > void
>> >@@ -208,16 +208,16 @@ match_set_tun_ipv6_src_masked(struct match
>> >*match, const struct in6_addr *src,  void
>> >match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst)
>> >{
>> >-    match->flow.tunnel.ipv6_dst = *dst;
>> >-    match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
>> >+    set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
>> >+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
>> > }
>> >
>> > void
>> > match_set_tun_ipv6_dst_masked(struct match *match, const struct
>> >in6_addr *dst,
>> >                               const struct in6_addr *mask)  {
>> >-    match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
>> >-    match->wc.masks.tunnel.ipv6_dst = *mask;
>> >+    set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst,
>>mask));
>> >+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
>> > }
>> >
>> > void
>> >diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 6bd0b99..9d939be
>> >100644
>> >--- a/lib/meta-flow.c
>> >+++ b/lib/meta-flow.c
>> >@@ -1180,13 +1180,13 @@ mf_set_flow_value(const struct mf_field *mf,
>> >         flow->tunnel.ip_src = value->be32;
>> >         break;
>> >     case MFF_TUN_DST:
>> >-        flow->tunnel.ip_dst = value->be32;
>> >+        set_ipv4_dst_tnl(&flow->tunnel, value->be32);
>> >         break;
>> >     case MFF_TUN_IPV6_SRC:
>> >         flow->tunnel.ipv6_src = value->ipv6;
>> >         break;
>> >     case MFF_TUN_IPV6_DST:
>> >-        flow->tunnel.ipv6_dst = value->ipv6;
>> >+        set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
>> >         break;
>> >     case MFF_TUN_FLAGS:
>> >         flow->tunnel.flags = (flow->tunnel.flags &
>> > ~FLOW_TNL_PUB_F_MASK)
>> >|
>> >@@ -1503,6 +1503,12 @@ mf_set_wild(const struct mf_field *mf, struct
>> >match *match, char **err_str)
>> >                sizeof match->wc.masks.tunnel.ipv6_dst);
>> >         memset(&match->flow.tunnel.ipv6_dst, 0,
>> >                sizeof match->flow.tunnel.ipv6_dst);
>> >+        /* What if flow have a valid ipv4 tunnel data??
>> >+         * Reset the dl_type only if thats not the case.
>> >+         */
>> >+        match->wc.masks.tunnel.dl_type =
>> >+ (match->wc.masks.tunnel.dl_type
>> >==
>> >+                                          htons(ETH_TYPE_IP)) ?
>> >+
>> >+ match->wc.masks.tunnel.dl_type
>> >: 0;
>> >         break;
>> >     case MFF_TUN_FLAGS:
>> >         match_set_tun_flags_masked(match, 0, 0); diff --git
>> >a/lib/netdev-vport.c b/lib/netdev-vport.c index 88f5022..94ddf59 100644
>> >--- a/lib/netdev-vport.c
>> >+++ b/lib/netdev-vport.c
>> >@@ -877,7 +877,7 @@ ip_extract_tnl_md(struct dp_packet *packet, struct
>> >flow_tnl *tnl,  {
>> >     void *nh;
>> >     struct ip_header *ip;
>> >-    struct ovs_16aligned_ip6_hdr *ip6;
>> >+    struct ip6_hdr *ip6;
>> >     void *l4;
>> >     int l3_size;
>> >
>> >@@ -919,19 +919,19 @@ ip_extract_tnl_md(struct dp_packet *packet,
>> >struct flow_tnl *tnl,
>> >         ip_dst = get_16aligned_be32(&ip->ip_dst);
>> >
>> >         tnl->ip_src = ip_src;
>> >-        tnl->ip_dst = ip_dst;
>> >         tnl->ip_tos = ip->ip_tos;
>> >         tnl->ip_ttl = ip->ip_ttl;
>> >+        set_ipv4_dst_tnl(tnl, ip_dst);
>> >
>> >         *hlen += IP_HEADER_LEN;
>> >
>> >     } else if (IP_VER(ip->ip_ihl_ver) == 6) {
>> >
>> >-        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof
>> >ip6->ip6_src);
>> >-        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof
>> >ip6->ip6_dst);
>> >+        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr,
>> >+               sizeof ip6->ip6_src);
>> >         tnl->ip_tos = 0;
>> >         tnl->ip_ttl = ip6->ip6_hlim;
>> >-
>> >+        set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
>> >         *hlen += IPV6_HEADER_LEN;
>> >
>> >     } else {
>> >diff --git a/lib/odp-util.c b/lib/odp-util.c index f16e113..309bb9c
>> >100644
>> >--- a/lib/odp-util.c
>> >+++ b/lib/odp-util.c
>> >@@ -510,7 +510,7 @@ format_odp_tnl_push_header(struct ds *ds, struct
>> >ovs_action_push_tnl *data)
>> >                       gnh->oam ? "oam," : "",
>> >                       gnh->critical ? "crit," : "",
>> >                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
>> >-
>> >+
>> >         if (gnh->opt_len) {
>> >             ds_put_cstr(ds, ",options(");
>> >             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
>> >@@ -1864,13 +1864,13 @@ odp_tun_key_from_attr__(const struct nlattr
>> *attr,
>> >             tun->ip_src = nl_attr_get_be32(a);
>> >             break;
>> >         case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
>> >-            tun->ip_dst = nl_attr_get_be32(a);
>> >+            set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
>> >             break;
>> >         case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
>> >             tun->ipv6_src = nl_attr_get_in6_addr(a);
>> >             break;
>> >         case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
>> >-            tun->ipv6_dst = nl_attr_get_in6_addr(a);
>> >+            set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
>> >             break;
>> >         case OVS_TUNNEL_KEY_ATTR_TOS:
>> >             tun->ip_tos = nl_attr_get_u8(a); @@ -1961,13 +1961,13 @@
>> >tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
>> >     if (tun_key->ip_src) {
>> >         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
>> >tun_key->ip_src);
>> >     }
>> >-    if (tun_key->ip_dst) {
>> >+    if (tun_key->dl_type == htons(ETH_TYPE_IP)) {
>> >         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
>> >tun_key->ip_dst);
>> >     }
>> >     if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
>> >         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
>> >&tun_key->ipv6_src);
>> >     }
>> >-    if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
>> >+    if (tun_key->dl_type == htons(ETH_TYPE_IPV6)) {
>> >         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
>> >&tun_key->ipv6_dst);
>> >     }
>> >     if (tun_key->ip_tos) {
>> >diff --git a/lib/packets.c b/lib/packets.c index d82341d..4f00c0e
>> >100644
>> >--- a/lib/packets.c
>> >+++ b/lib/packets.c
>> >@@ -39,7 +39,12 @@ const struct in6_addr in6addr_all_hosts =
>> >IN6ADDR_ALL_HOSTS_INIT;  struct in6_addr  flow_tnl_dst(const struct
>> >flow_tnl *tnl)  {
>> >-    return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) :
>> >tnl->ipv6_dst;
>> >+    if (tnl->dl_type == htons(ETH_TYPE_IP)) {
>> >+        return in6_addr_mapped_ipv4(tnl->ip_dst);
>> >+    } else if (tnl->dl_type == htons(ETH_TYPE_IPV6)) {
>> >+        return tnl->ipv6_dst;
>> >+    }
>> >+    return in6addr_any;
>> > }
>> >
>> > struct in6_addr
>> >diff --git a/lib/packets.h b/lib/packets.h index 834e8a4..7e8dd91
>> >100644
>> >--- a/lib/packets.h
>> >+++ b/lib/packets.h
>> >@@ -35,8 +35,26 @@
>> > struct dp_packet;
>> > struct ds;
>> >
>> >+#define ETH_TYPE_IP            0x0800
>> >+#define ETH_TYPE_ARP           0x0806
>> >+#define ETH_TYPE_TEB           0x6558
>> >+#define ETH_TYPE_VLAN_8021Q    0x8100
>> >+#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
>> >+#define ETH_TYPE_VLAN_8021AD   0x88a8
>> >+#define ETH_TYPE_IPV6          0x86dd
>> >+#define ETH_TYPE_LACP          0x8809
>> >+#define ETH_TYPE_RARP          0x8035
>> >+#define ETH_TYPE_MPLS          0x8847
>> >+#define ETH_TYPE_MPLS_MCAST    0x8848
>> >+
>> > /* Tunnel information used in flow key and metadata. */ struct
>> > flow_tnl {
>> >+    /* The tunnel destination ip/ipv6 address are not initializing to
>> >zero
>> >+     * due to the performance constrains. the ethernet type field
>> >'dl_type' is
>> >+     * used to validate the destination addresses. 'dl_type' is set to
>> >zero
>> >+     * for a invalid flow_tnl entry.
>> >+     */
>> >+    ovs_be16 dl_type;
>> >     ovs_be32 ip_dst;
>> >     struct in6_addr ipv6_dst;
>> >     ovs_be32 ip_src;
>> >@@ -49,7 +67,7 @@ struct flow_tnl {
>> >     ovs_be16 tp_dst;
>> >     ovs_be16 gbp_id;
>> >     uint8_t  gbp_flags;
>> >-    uint8_t  pad1[5];        /* Pad to 64 bits. */
>> >+    uint8_t  pad1[1];        /* Pad to 64 bits. */
>> >     struct tun_metadata metadata;
>> > };
>> >
>> >@@ -76,10 +94,36 @@ struct flow_tnl {
>> >
>> > static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
>> >
>> >+static inline void
>> >+set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst) {
>> >+    if (tnl) {
>> >+        tnl->ip_dst = ip_dst;
>> >+        if (tnl->ip_dst) {
>> >+            tnl->dl_type = htons(ETH_TYPE_IP);
>> >+        } else {
>> >+            tnl->dl_type = 0;
>> >+        }
>> >+    }
>> >+}
>> >+
>> >+static inline void
>> >+set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst) {
>> >+    if (tnl) {
>> >+        tnl->ipv6_dst = ipv6_dst;
>> >+        if (ipv6_addr_is_set(&tnl->ipv6_dst)) {
>> >+            tnl->dl_type = htons(ETH_TYPE_IPV6);
>> >+        } else {
>> >+            tnl->dl_type = 0;
>> >+        }
>> >+    }
>> >+}
>> >+
>> > static inline bool
>> > flow_tnl_dst_is_set(const struct flow_tnl *tnl)  {
>> >-    return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
>> >+    return (tnl->dl_type != 0);
>> > }
>> >
>> > struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl); @@ -90,8
>> >+134,8 @@ static inline size_t  flow_tnl_size(const struct flow_tnl
>> >*src)  {
>> >     if (!flow_tnl_dst_is_set(src)) {
>> >-        /* Covers ip_dst and ipv6_dst only. */
>> >-        return offsetof(struct flow_tnl, ip_src);
>> >+        /* Covers dl_type field only. */
>> >+        return offsetof(struct flow_tnl, ip_dst);
>> >     }
>> >     if (src->flags & FLOW_TNL_F_UDPIF) {
>> >         /* Datapath format, cover all options we have. */ @@ -154,11
>> >+198,10 @@ static inline void  pkt_metadata_init(struct pkt_metadata
>> >*md, odp_port_t port)  {
>> >     /* It can be expensive to zero out all of the tunnel metadata.
>> >However,
>> >-     * we can just zero out ip_dst and the rest of the data will
>>never be
>> >+     * we can just zero out dl_type and the rest of the data will
>> >+ never
>> >be
>> >      * looked at. */
>> >     memset(md, 0, offsetof(struct pkt_metadata, in_port));
>> >-    md->tunnel.ip_dst = 0;
>> >-    md->tunnel.ipv6_dst = in6addr_any;
>> >+    md->tunnel.dl_type = 0;
>> >
>> >     md->in_port.odp_port = port;
>> > }
>> >@@ -347,18 +390,6 @@ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t
>> >tc, uint8_t bos,  #define ETH_ADDR_SCAN_ARGS(EA) \
>> >     &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4],
>> >&(EA).ea[5]
>> >
>> >-#define ETH_TYPE_IP            0x0800
>> >-#define ETH_TYPE_ARP           0x0806
>> >-#define ETH_TYPE_TEB           0x6558
>> >-#define ETH_TYPE_VLAN_8021Q    0x8100
>> >-#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
>> >-#define ETH_TYPE_VLAN_8021AD   0x88a8
>> >-#define ETH_TYPE_IPV6          0x86dd
>> >-#define ETH_TYPE_LACP          0x8809
>> >-#define ETH_TYPE_RARP          0x8035
>> >-#define ETH_TYPE_MPLS          0x8847
>> >-#define ETH_TYPE_MPLS_MCAST    0x8848
>> >-
>> > static inline bool eth_type_mpls(ovs_be16 eth_type)  {
>> >     return eth_type == htons(ETH_TYPE_MPLS) || diff --git
>> >a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index
>> >a610c53..ca06583 100644
>> >--- a/ofproto/ofproto-dpif-ipfix.c
>> >+++ b/ofproto/ofproto-dpif-ipfix.c
>> >@@ -1721,7 +1721,8 @@ dpif_ipfix_bridge_sample(struct dpif_ipfix *di,
>> >const struct dp_packet *packet,
>> >      * of matched packets. */
>> >     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
>> >     if (di->bridge_exporter.options->enable_tunnel_sampling) {
>> >-        if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
>> >+        if (output_odp_port == ODPP_NONE &&
>> >+                flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>> >             /* Input tunnel. */
>> >             tunnel_key = &flow->tunnel;
>> >             tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
>> >diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
>> >index d142933..735b5f3 100644
>> >--- a/ofproto/ofproto-dpif-rid.c
>> >+++ b/ofproto/ofproto-dpif-rid.c
>> >@@ -297,8 +297,7 @@ uint32_t
>> > recirc_alloc_id(struct ofproto_dpif *ofproto)  {
>> >     struct flow_tnl tunnel;
>> >-    tunnel.ip_dst = htonl(0);
>> >-    tunnel.ipv6_dst = in6addr_any;
>> >+    tunnel.dl_type = 0;
>> >     struct recirc_state state = {
>> >         .table_id = TBL_INTERNAL,
>> >         .ofproto = ofproto,
>> >diff --git a/ofproto/ofproto-dpif-sflow.c
>> >b/ofproto/ofproto-dpif-sflow.c index f11699c..ea21a1a 100644
>> >--- a/ofproto/ofproto-dpif-sflow.c
>> >+++ b/ofproto/ofproto-dpif-sflow.c
>> >@@ -899,7 +899,7 @@ sflow_read_tnl_push_action(const struct nlattr
>> *attr,
>> >     /* IPv4 */
>> >     /* Cannot assume alignment so just use memcpy. */
>> >     sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
>> >-    sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
>> >+    set_ipv4_dst_tnl(&sflow_actions->tunnel,
>> >get_16aligned_be32(&ip->ip_dst));
>> >     sflow_actions->tunnel.ip_tos = ip->ip_tos;
>> >     sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
>> >     /* The tnl_push action can supply the ip_protocol too. */ @@
>> >-991,7 +991,7 @@ sflow_read_set_action(const struct nlattr *attr,
>> >                 sflow_actions->tunnel.ip_src = key->ipv4_src;
>> >             }
>> >             if (key->ipv4_dst) {
>> >-                sflow_actions->tunnel.ip_dst = key->ipv4_dst;
>> >+                set_ipv4_dst_tnl(&sflow_actions->tunnel,
>> >+ key->ipv4_dst);
>> >             }
>> >             if (key->ipv4_proto) {
>> >                 sflow_actions->tunnel_ipproto = key->ipv4_proto; @@
>> >-1287,7 +1287,7 @@ dpif_sflow_received(struct dpif_sflow *ds, const
>> >struct dp_packet *packet,
>> >     fs.output = cookie->sflow.output;
>> >
>> >     /* Input tunnel. */
>> >-    if (flow->tunnel.ip_dst) {
>> >+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>> > 	memset(&tnlInElem, 0, sizeof(tnlInElem));
>> > 	tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
>> > 	tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
>> >diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 5cf6c75..b2bd254
>> >100644
>> >--- a/ofproto/tunnel.c
>> >+++ b/ofproto/tunnel.c
>> >@@ -362,12 +362,12 @@ tnl_wc_init(struct flow *flow, struct
>> >flow_wildcards *wc)  {
>> >     if (tnl_port_should_receive(flow)) {
>> >         wc->masks.tunnel.tun_id = OVS_BE64_MAX;
>> >-        if (flow->tunnel.ip_dst) {
>> >+        if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
>> >             wc->masks.tunnel.ip_src = OVS_BE32_MAX;
>> >-            wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
>> >-        } else {
>> >+            set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
>> >+        } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
>> >             wc->masks.tunnel.ipv6_src = in6addr_exact;
>> >-            wc->masks.tunnel.ipv6_dst = in6addr_exact;
>> >+            set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
>> >         }
>> >         wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
>> >                                   FLOW_TNL_F_CSUM | @@ -424,7 +424,9
>> >@@ tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
>> >     if (!cfg->ip_dst_flow) {
>> >         flow->tunnel.ip_dst =
>> >in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst);
>> >         if (!flow->tunnel.ip_dst) {
>> >-            flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
>> >+            set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
>> >+        } else {
>> >+            set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
>> >         }
>> >     }
>> >     flow->pkt_mark = tnl_port->match.pkt_mark;
>> >--
>> >1.9.1
>> >
>> >_______________________________________________
>> >dev mailing list
>> >dev@openvswitch.org
>> >http://openvswitch.org/mailman/listinfo/dev
>
diff mbox

Patch

diff --git a/lib/flow.c b/lib/flow.c
index 5668d0c..33aadd1 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -818,15 +818,14 @@  flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
     if (flow->tunnel.ip_src) {
         match_set_tun_src(flow_metadata, flow->tunnel.ip_src);
     }
-    if (flow->tunnel.ip_dst) {
+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
         match_set_tun_dst(flow_metadata, flow->tunnel.ip_dst);
+    } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
+        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
     }
     if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
         match_set_tun_ipv6_src(flow_metadata, &flow->tunnel.ipv6_src);
     }
-    if (ipv6_addr_is_set(&flow->tunnel.ipv6_dst)) {
-        match_set_tun_ipv6_dst(flow_metadata, &flow->tunnel.ipv6_dst);
-    }
     if (flow->tunnel.gbp_id != htons(0)) {
         match_set_tun_gbp_id(flow_metadata, flow->tunnel.gbp_id);
     }
diff --git a/lib/match.c b/lib/match.c
index 95d34bc..97a623e 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -186,8 +186,8 @@  match_set_tun_dst(struct match *match, ovs_be32 dst)
 void
 match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32 mask)
 {
-    match->wc.masks.tunnel.ip_dst = mask;
-    match->flow.tunnel.ip_dst = dst & mask;
+    set_ipv4_dst_tnl(&match->wc.masks.tunnel, mask);
+    set_ipv4_dst_tnl(&match->flow.tunnel, dst & mask);
 }
 
 void
@@ -208,16 +208,16 @@  match_set_tun_ipv6_src_masked(struct match *match, const struct in6_addr *src,
 void
 match_set_tun_ipv6_dst(struct match *match, const struct in6_addr *dst)
 {
-    match->flow.tunnel.ipv6_dst = *dst;
-    match->wc.masks.tunnel.ipv6_dst = in6addr_exact;
+    set_ipv6_dst_tnl(&match->flow.tunnel, *dst);
+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, in6addr_exact);
 }
 
 void
 match_set_tun_ipv6_dst_masked(struct match *match, const struct in6_addr *dst,
                               const struct in6_addr *mask)
 {
-    match->flow.tunnel.ipv6_dst = ipv6_addr_bitand(dst, mask);
-    match->wc.masks.tunnel.ipv6_dst = *mask;
+    set_ipv6_dst_tnl(&match->flow.tunnel, ipv6_addr_bitand(dst, mask));
+    set_ipv6_dst_tnl(&match->wc.masks.tunnel, *mask);
 }
 
 void
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 6bd0b99..9d939be 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -1180,13 +1180,13 @@  mf_set_flow_value(const struct mf_field *mf,
         flow->tunnel.ip_src = value->be32;
         break;
     case MFF_TUN_DST:
-        flow->tunnel.ip_dst = value->be32;
+        set_ipv4_dst_tnl(&flow->tunnel, value->be32);
         break;
     case MFF_TUN_IPV6_SRC:
         flow->tunnel.ipv6_src = value->ipv6;
         break;
     case MFF_TUN_IPV6_DST:
-        flow->tunnel.ipv6_dst = value->ipv6;
+        set_ipv6_dst_tnl(&flow->tunnel, value->ipv6);
         break;
     case MFF_TUN_FLAGS:
         flow->tunnel.flags = (flow->tunnel.flags & ~FLOW_TNL_PUB_F_MASK) |
@@ -1503,6 +1503,12 @@  mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
                sizeof match->wc.masks.tunnel.ipv6_dst);
         memset(&match->flow.tunnel.ipv6_dst, 0,
                sizeof match->flow.tunnel.ipv6_dst);
+        /* What if flow have a valid ipv4 tunnel data??
+         * Reset the dl_type only if thats not the case.
+         */
+        match->wc.masks.tunnel.dl_type = (match->wc.masks.tunnel.dl_type ==
+                                          htons(ETH_TYPE_IP)) ?
+                                          match->wc.masks.tunnel.dl_type : 0;
         break;
     case MFF_TUN_FLAGS:
         match_set_tun_flags_masked(match, 0, 0);
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 88f5022..94ddf59 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -877,7 +877,7 @@  ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
 {
     void *nh;
     struct ip_header *ip;
-    struct ovs_16aligned_ip6_hdr *ip6;
+    struct ip6_hdr *ip6;
     void *l4;
     int l3_size;
 
@@ -919,19 +919,19 @@  ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
         ip_dst = get_16aligned_be32(&ip->ip_dst);
 
         tnl->ip_src = ip_src;
-        tnl->ip_dst = ip_dst;
         tnl->ip_tos = ip->ip_tos;
         tnl->ip_ttl = ip->ip_ttl;
+        set_ipv4_dst_tnl(tnl, ip_dst);
 
         *hlen += IP_HEADER_LEN;
 
     } else if (IP_VER(ip->ip_ihl_ver) == 6) {
 
-        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
-        memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);
+        memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.s6_addr,
+               sizeof ip6->ip6_src);
         tnl->ip_tos = 0;
         tnl->ip_ttl = ip6->ip6_hlim;
-
+        set_ipv6_dst_tnl(tnl, *(struct in6_addr *)&ip6->ip6_dst);
         *hlen += IPV6_HEADER_LEN;
 
     } else {
diff --git a/lib/odp-util.c b/lib/odp-util.c
index f16e113..309bb9c 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -510,7 +510,7 @@  format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
                       gnh->oam ? "oam," : "",
                       gnh->critical ? "crit," : "",
                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
- 
+
         if (gnh->opt_len) {
             ds_put_cstr(ds, ",options(");
             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
@@ -1864,13 +1864,13 @@  odp_tun_key_from_attr__(const struct nlattr *attr,
             tun->ip_src = nl_attr_get_be32(a);
             break;
         case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
-            tun->ip_dst = nl_attr_get_be32(a);
+            set_ipv4_dst_tnl(tun, nl_attr_get_be32(a));
             break;
         case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
             tun->ipv6_src = nl_attr_get_in6_addr(a);
             break;
         case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
-            tun->ipv6_dst = nl_attr_get_in6_addr(a);
+            set_ipv6_dst_tnl(tun, nl_attr_get_in6_addr(a));
             break;
         case OVS_TUNNEL_KEY_ATTR_TOS:
             tun->ip_tos = nl_attr_get_u8(a);
@@ -1961,13 +1961,13 @@  tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
     if (tun_key->ip_src) {
         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ip_src);
     }
-    if (tun_key->ip_dst) {
+    if (tun_key->dl_type == htons(ETH_TYPE_IP)) {
         nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ip_dst);
     }
     if (ipv6_addr_is_set(&tun_key->ipv6_src)) {
         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, &tun_key->ipv6_src);
     }
-    if (ipv6_addr_is_set(&tun_key->ipv6_dst)) {
+    if (tun_key->dl_type == htons(ETH_TYPE_IPV6)) {
         nl_msg_put_in6_addr(a, OVS_TUNNEL_KEY_ATTR_IPV6_DST, &tun_key->ipv6_dst);
     }
     if (tun_key->ip_tos) {
diff --git a/lib/packets.c b/lib/packets.c
index d82341d..4f00c0e 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -39,7 +39,12 @@  const struct in6_addr in6addr_all_hosts = IN6ADDR_ALL_HOSTS_INIT;
 struct in6_addr
 flow_tnl_dst(const struct flow_tnl *tnl)
 {
-    return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) : tnl->ipv6_dst;
+    if (tnl->dl_type == htons(ETH_TYPE_IP)) {
+        return in6_addr_mapped_ipv4(tnl->ip_dst);
+    } else if (tnl->dl_type == htons(ETH_TYPE_IPV6)) {
+        return tnl->ipv6_dst;
+    }
+    return in6addr_any;
 }
 
 struct in6_addr
diff --git a/lib/packets.h b/lib/packets.h
index 834e8a4..7e8dd91 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -35,8 +35,26 @@ 
 struct dp_packet;
 struct ds;
 
+#define ETH_TYPE_IP            0x0800
+#define ETH_TYPE_ARP           0x0806
+#define ETH_TYPE_TEB           0x6558
+#define ETH_TYPE_VLAN_8021Q    0x8100
+#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
+#define ETH_TYPE_VLAN_8021AD   0x88a8
+#define ETH_TYPE_IPV6          0x86dd
+#define ETH_TYPE_LACP          0x8809
+#define ETH_TYPE_RARP          0x8035
+#define ETH_TYPE_MPLS          0x8847
+#define ETH_TYPE_MPLS_MCAST    0x8848
+
 /* Tunnel information used in flow key and metadata. */
 struct flow_tnl {
+    /* The tunnel destination ip/ipv6 address are not initializing to zero
+     * due to the performance constrains. the ethernet type field 'dl_type' is
+     * used to validate the destination addresses. 'dl_type' is set to zero
+     * for a invalid flow_tnl entry.
+     */
+    ovs_be16 dl_type;
     ovs_be32 ip_dst;
     struct in6_addr ipv6_dst;
     ovs_be32 ip_src;
@@ -49,7 +67,7 @@  struct flow_tnl {
     ovs_be16 tp_dst;
     ovs_be16 gbp_id;
     uint8_t  gbp_flags;
-    uint8_t  pad1[5];        /* Pad to 64 bits. */
+    uint8_t  pad1[1];        /* Pad to 64 bits. */
     struct tun_metadata metadata;
 };
 
@@ -76,10 +94,36 @@  struct flow_tnl {
 
 static inline bool ipv6_addr_is_set(const struct in6_addr *addr);
 
+static inline void
+set_ipv4_dst_tnl(struct flow_tnl *tnl, ovs_be32 ip_dst)
+{
+    if (tnl) {
+        tnl->ip_dst = ip_dst;
+        if (tnl->ip_dst) {
+            tnl->dl_type = htons(ETH_TYPE_IP);
+        } else {
+            tnl->dl_type = 0;
+        }
+    }
+}
+
+static inline void
+set_ipv6_dst_tnl(struct flow_tnl *tnl, struct in6_addr ipv6_dst)
+{
+    if (tnl) {
+        tnl->ipv6_dst = ipv6_dst;
+        if (ipv6_addr_is_set(&tnl->ipv6_dst)) {
+            tnl->dl_type = htons(ETH_TYPE_IPV6);
+        } else {
+            tnl->dl_type = 0;
+        }
+    }
+}
+
 static inline bool
 flow_tnl_dst_is_set(const struct flow_tnl *tnl)
 {
-    return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
+    return (tnl->dl_type != 0);
 }
 
 struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl);
@@ -90,8 +134,8 @@  static inline size_t
 flow_tnl_size(const struct flow_tnl *src)
 {
     if (!flow_tnl_dst_is_set(src)) {
-        /* Covers ip_dst and ipv6_dst only. */
-        return offsetof(struct flow_tnl, ip_src);
+        /* Covers dl_type field only. */
+        return offsetof(struct flow_tnl, ip_dst);
     }
     if (src->flags & FLOW_TNL_F_UDPIF) {
         /* Datapath format, cover all options we have. */
@@ -154,11 +198,10 @@  static inline void
 pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
 {
     /* It can be expensive to zero out all of the tunnel metadata. However,
-     * we can just zero out ip_dst and the rest of the data will never be
+     * we can just zero out dl_type and the rest of the data will never be
      * looked at. */
     memset(md, 0, offsetof(struct pkt_metadata, in_port));
-    md->tunnel.ip_dst = 0;
-    md->tunnel.ipv6_dst = in6addr_any;
+    md->tunnel.dl_type = 0;
 
     md->in_port.odp_port = port;
 }
@@ -347,18 +390,6 @@  ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos,
 #define ETH_ADDR_SCAN_ARGS(EA) \
     &(EA).ea[0], &(EA).ea[1], &(EA).ea[2], &(EA).ea[3], &(EA).ea[4], &(EA).ea[5]
 
-#define ETH_TYPE_IP            0x0800
-#define ETH_TYPE_ARP           0x0806
-#define ETH_TYPE_TEB           0x6558
-#define ETH_TYPE_VLAN_8021Q    0x8100
-#define ETH_TYPE_VLAN          ETH_TYPE_VLAN_8021Q
-#define ETH_TYPE_VLAN_8021AD   0x88a8
-#define ETH_TYPE_IPV6          0x86dd
-#define ETH_TYPE_LACP          0x8809
-#define ETH_TYPE_RARP          0x8035
-#define ETH_TYPE_MPLS          0x8847
-#define ETH_TYPE_MPLS_MCAST    0x8848
-
 static inline bool eth_type_mpls(ovs_be16 eth_type)
 {
     return eth_type == htons(ETH_TYPE_MPLS) ||
diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
index a610c53..ca06583 100644
--- a/ofproto/ofproto-dpif-ipfix.c
+++ b/ofproto/ofproto-dpif-ipfix.c
@@ -1721,7 +1721,8 @@  dpif_ipfix_bridge_sample(struct dpif_ipfix *di, const struct dp_packet *packet,
      * of matched packets. */
     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
     if (di->bridge_exporter.options->enable_tunnel_sampling) {
-        if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
+        if (output_odp_port == ODPP_NONE &&
+                flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
             /* Input tunnel. */
             tunnel_key = &flow->tunnel;
             tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
index d142933..735b5f3 100644
--- a/ofproto/ofproto-dpif-rid.c
+++ b/ofproto/ofproto-dpif-rid.c
@@ -297,8 +297,7 @@  uint32_t
 recirc_alloc_id(struct ofproto_dpif *ofproto)
 {
     struct flow_tnl tunnel;
-    tunnel.ip_dst = htonl(0);
-    tunnel.ipv6_dst = in6addr_any;
+    tunnel.dl_type = 0;
     struct recirc_state state = {
         .table_id = TBL_INTERNAL,
         .ofproto = ofproto,
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index f11699c..ea21a1a 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -899,7 +899,7 @@  sflow_read_tnl_push_action(const struct nlattr *attr,
     /* IPv4 */
     /* Cannot assume alignment so just use memcpy. */
     sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
-    sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
+    set_ipv4_dst_tnl(&sflow_actions->tunnel, get_16aligned_be32(&ip->ip_dst));
     sflow_actions->tunnel.ip_tos = ip->ip_tos;
     sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
     /* The tnl_push action can supply the ip_protocol too. */
@@ -991,7 +991,7 @@  sflow_read_set_action(const struct nlattr *attr,
                 sflow_actions->tunnel.ip_src = key->ipv4_src;
             }
             if (key->ipv4_dst) {
-                sflow_actions->tunnel.ip_dst = key->ipv4_dst;
+                set_ipv4_dst_tnl(&sflow_actions->tunnel, key->ipv4_dst);
             }
             if (key->ipv4_proto) {
                 sflow_actions->tunnel_ipproto = key->ipv4_proto;
@@ -1287,7 +1287,7 @@  dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
     fs.output = cookie->sflow.output;
 
     /* Input tunnel. */
-    if (flow->tunnel.ip_dst) {
+    if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
 	memset(&tnlInElem, 0, sizeof(tnlInElem));
 	tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
 	tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
index 5cf6c75..b2bd254 100644
--- a/ofproto/tunnel.c
+++ b/ofproto/tunnel.c
@@ -362,12 +362,12 @@  tnl_wc_init(struct flow *flow, struct flow_wildcards *wc)
 {
     if (tnl_port_should_receive(flow)) {
         wc->masks.tunnel.tun_id = OVS_BE64_MAX;
-        if (flow->tunnel.ip_dst) {
+        if (flow->tunnel.dl_type == htons(ETH_TYPE_IP)) {
             wc->masks.tunnel.ip_src = OVS_BE32_MAX;
-            wc->masks.tunnel.ip_dst = OVS_BE32_MAX;
-        } else {
+            set_ipv4_dst_tnl(&wc->masks.tunnel, OVS_BE32_MAX);
+        } else if (flow->tunnel.dl_type == htons(ETH_TYPE_IPV6)) {
             wc->masks.tunnel.ipv6_src = in6addr_exact;
-            wc->masks.tunnel.ipv6_dst = in6addr_exact;
+            set_ipv6_dst_tnl(&wc->masks.tunnel, in6addr_exact);
         }
         wc->masks.tunnel.flags = (FLOW_TNL_F_DONT_FRAGMENT |
                                   FLOW_TNL_F_CSUM |
@@ -424,7 +424,9 @@  tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
     if (!cfg->ip_dst_flow) {
         flow->tunnel.ip_dst = in6_addr_get_mapped_ipv4(&tnl_port->match.ipv6_dst);
         if (!flow->tunnel.ip_dst) {
-            flow->tunnel.ipv6_dst = tnl_port->match.ipv6_dst;
+            set_ipv6_dst_tnl(&flow->tunnel, tnl_port->match.ipv6_dst);
+        } else {
+            set_ipv4_dst_tnl(&flow->tunnel, flow->tunnel.ip_dst);
         }
     }
     flow->pkt_mark = tnl_port->match.pkt_mark;