diff mbox

[ovs-dev,v11,4/5] userspace: extend layer 3 support to cover non-IP packets

Message ID 1464848661-7562-5-git-send-email-simon.horman@netronome.com
State Changes Requested
Headers show

Commit Message

Simon Horman June 2, 2016, 6:24 a.m. UTC
Extend support for layer 3 packets to cover non-IP packets.

This removes the assumption that the first octet of a layer 3 packet
indicates the IP protocol version - true for IP (v4 and v6), but not
for necessarily for other protocols.

The key motivation for this is to allow forwarding of MPLS packets which
are technically layer 2.5 rather than 3 but the distinction seems unimportant
here.

This relies on datapaths setting OVS_KEY_ATTR_PACKET_ETHERTYPE to
the ethernet type corresponding to the protocol of layer 3 packets
on a flow miss.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Ben Pfaff <blp@ovn.org>

---
v11
* Added Ack from Ben Pfaff

v10
* Delete now bogus comment about protocol assumptions for L3 packets

v9
* New patch
---
 include/openvswitch/ofp-print.h |  7 +++++-
 lib/dp-packet.h                 |  2 +-
 lib/dpif-netdev.c               |  5 +---
 lib/dpif.c                      |  9 ++-----
 lib/flow.c                      | 40 +++++++++----------------------
 lib/odp-util.c                  | 53 ++++++++++++++++++++++++++++++-----------
 lib/ofp-print.c                 | 24 +++++++++++++++----
 lib/packets.c                   | 14 ++++++++---
 8 files changed, 90 insertions(+), 64 deletions(-)
diff mbox

Patch

diff --git a/include/openvswitch/ofp-print.h b/include/openvswitch/ofp-print.h
index 3e951173acc2..dce80a7cbc88 100644
--- a/include/openvswitch/ofp-print.h
+++ b/include/openvswitch/ofp-print.h
@@ -23,6 +23,8 @@ 
 #include <stdio.h>
 #include <stdbool.h>
 
+#include <openvswitch/types.h>
+
 struct ds;
 struct ofp10_match;
 struct ofp_flow_mod;
@@ -30,6 +32,7 @@  struct ofp_header;
 struct ofputil_flow_stats;
 struct ofputil_table_features;
 struct ofputil_table_stats;
+struct dp_packet;
 
 #ifdef  __cplusplus
 extern "C" {
@@ -42,7 +45,9 @@  void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity);
 
 char *ofp_to_string(const void *, size_t, int verbosity);
 char *ofp10_match_to_string(const struct ofp10_match *, int verbosity);
-char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3);
+char *ofp_packet_to_string(const void *data, size_t len,
+			   ovs_be16 packet_ethertype);
+char *ofp_dp_packet_to_string(const struct dp_packet *);
 
 void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *);
 void ofp_print_version(const struct ofp_header *, struct ds *);
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 6b3ecd974e48..ca788640597f 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -258,7 +258,7 @@  dp_packet_equal(const struct dp_packet *a, const struct dp_packet *b)
 static inline bool
 dp_packet_is_l3(const struct dp_packet *b)
 {
-    return b->l3_ofs == 0;
+    return b->l3_ofs == 0 || b->l2_5_ofs == 0;
 }
 
 /* Get the start of the Ethernet frame.  Return NULL if 'b' is an l3 packet
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 6d79febfc8ed..4cf10dc28876 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3501,10 +3501,7 @@  dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
 
         ofpbuf_init(&key, 0);
         odp_flow_key_from_flow(&odp_parms, &key);
-        packet_str = ofp_packet_to_string(dp_packet_data(packet_),
-                                          dp_packet_size(packet_),
-                                          dp_packet_is_l3(packet_));
-
+        packet_str = ofp_dp_packet_to_string(packet_);
         odp_flow_key_format(key.data, key.size, &ds);
 
         VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name,
diff --git a/lib/dpif.c b/lib/dpif.c
index cb9519e2de5c..808109bd4140 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1390,10 +1390,7 @@  dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall)
         struct ds flow;
         char *packet;
 
-        packet = ofp_packet_to_string(dp_packet_data(&upcall->packet),
-                                      dp_packet_size(&upcall->packet),
-                                      dp_packet_is_l3(&upcall->packet));
-
+        packet = ofp_dp_packet_to_string(&upcall->packet);
         ds_init(&flow);
         odp_flow_key_format(upcall->key, upcall->key_len, &flow);
 
@@ -1686,9 +1683,7 @@  log_execute_message(struct dpif *dpif, const struct dpif_execute *execute,
         struct ds ds = DS_EMPTY_INITIALIZER;
         char *packet;
 
-        packet = ofp_packet_to_string(dp_packet_data(execute->packet),
-                                      dp_packet_size(execute->packet),
-                                      dp_packet_is_l3(execute->packet));
+        packet = ofp_dp_packet_to_string(execute->packet);
         ds_put_format(&ds, "%s: %sexecute ",
                       dpif_name(dpif),
                       (subexecute ? "sub-"
diff --git a/lib/flow.c b/lib/flow.c
index 888e3b3f6a5a..abe7c220e5fd 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -439,23 +439,6 @@  invalid:
     arp_buf[1] = eth_addr_zero;
 }
 
-/* Determines IP version if a layer 3 packet */
-static ovs_be16
-get_l3_eth_type(struct dp_packet *packet)
-{
-    struct ip_header *ip = dp_packet_l3(packet);
-    int ip_ver = IP_VER(ip->ip_ihl_ver);
-
-    switch (ip_ver) {
-    case 4:
-        return htons(ETH_TYPE_IP);
-    case 6:
-        return htons(ETH_TYPE_IPV6);
-    default:
-        return 0;
-    }
-}
-
 /* Initializes 'flow' members from 'packet' and 'md'.
  * Expects packet->l3_ofs to be set to 0 for layer 3 packets.
  *
@@ -572,29 +555,28 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
             miniflow_push_be16(mf, dl_type, dl_type);
             miniflow_push_be16(mf, vlan_tci, vlan_tci);
         }
-
-        /* Parse mpls. */
-        if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
-            int count;
-            const void *mpls = data;
-
-            packet->l2_5_ofs = (char *)data - frame;
-            count = parse_mpls(&data, &size);
-            miniflow_push_words_32(mf, mpls_lse, mpls, count);
-        }
     } else {
-        /* We assume L3 packets are either IPv4 or IPv6. */
         packet->l3_ofs = 0;
         miniflow_pad_from_64(mf, base_layer);
         miniflow_push_uint8(mf, base_layer, LAYER_3);
         miniflow_pad_to_64(mf, base_layer);
 
-        dl_type = get_l3_eth_type(packet);
+        dl_type = packet->md.packet_ethertype;
         miniflow_pad_from_64(mf, dl_type);
         miniflow_push_be16(mf, dl_type, dl_type);
         miniflow_push_be16(mf, vlan_tci, 0);
     }
 
+    /* Parse mpls. */
+    if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
+        int count;
+        const void *mpls = data;
+
+        packet->l2_5_ofs = (char *)data - frame;
+        count = parse_mpls(&data, &size);
+        miniflow_push_words_32(mf, mpls_lse, mpls, count);
+    }
+
     /* Network layer. */
     packet->l3_ofs = (char *)data - frame;
 
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 2af401efb489..f8c1c24468cd 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -4359,6 +4359,8 @@  odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms,
         }
 
         nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type);
+    } else {
+        nl_msg_put_be16(buf, OVS_KEY_ATTR_PACKET_ETHERTYPE, data->dl_type);
     }
 
     if (flow->dl_type == htons(ETH_TYPE_IP)) {
@@ -4605,12 +4607,13 @@  odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
             md->base_layer = LAYER_2;
             wanted_attrs &= ~(1u << OVS_KEY_ATTR_ETHERNET);
             break;
+        case OVS_KEY_ATTR_PACKET_ETHERTYPE:
+            md->packet_ethertype = nl_attr_get_be16(nla);
+            break;
         case OVS_KEY_ATTR_IPV4:
-            md->packet_ethertype = htons(ETH_TYPE_IP);
             wanted_attrs &= ~(1u << OVS_KEY_ATTR_IPV4);
             break;
         case OVS_KEY_ATTR_IPV6:
-            md->packet_ethertype = htons(ETH_TYPE_IPV6);
             wanted_attrs &= ~(1u << OVS_KEY_ATTR_IPV6);
             break;
         default:
@@ -4763,6 +4766,29 @@  check_expectations(uint64_t present_attrs, int out_of_range_attr,
 }
 
 static bool
+parse_ethertype__(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
+                  uint64_t *expected_attrs, struct flow *flow,
+                  const struct flow *src_flow, unsigned attr_idx, bool is_mask)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+    flow->dl_type = nl_attr_get_be16(attrs[attr_idx]);
+
+    if (!is_mask && ntohs(flow->dl_type) < ETH_TYPE_MIN) {
+        VLOG_ERR_RL(&rl, "invalid Ethertype %"PRIu16" in flow key",
+                    ntohs(flow->dl_type));
+        return false;
+    }
+    if (is_mask && (!src_flow || ntohs(src_flow->dl_type) < ETH_TYPE_MIN) &&
+        flow->dl_type != htons(0xffff)) {
+        return false;
+    }
+    *expected_attrs |= UINT64_C(1) << attr_idx;
+
+    return true;
+}
+
+static bool
 parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
                 uint64_t present_attrs, uint64_t *expected_attrs,
                 struct flow *flow, const struct flow *src_flow)
@@ -4771,17 +4797,11 @@  parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
     bool is_mask = flow != src_flow;
 
     if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE)) {
-        flow->dl_type = nl_attr_get_be16(attrs[OVS_KEY_ATTR_ETHERTYPE]);
-        if (!is_mask && ntohs(flow->dl_type) < ETH_TYPE_MIN) {
-            VLOG_ERR_RL(&rl, "invalid Ethertype %"PRIu16" in flow key",
-                        ntohs(flow->dl_type));
-            return false;
-        }
-        if (is_mask && ntohs(src_flow->dl_type) < ETH_TYPE_MIN &&
-            flow->dl_type != htons(0xffff)) {
-            return false;
-        }
-        *expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE;
+        return parse_ethertype__(attrs, expected_attrs, flow, src_flow,
+                                 OVS_KEY_ATTR_ETHERTYPE, is_mask);
+    } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PACKET_ETHERTYPE)) {
+        return parse_ethertype__(attrs, expected_attrs, flow, src_flow,
+                                 OVS_KEY_ATTR_PACKET_ETHERTYPE, is_mask);
     } else {
         if (!is_mask) {
             if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) {
@@ -4791,6 +4811,8 @@  parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
             } else {
                 flow->dl_type = htons(FLOW_DL_TYPE_NONE);
             }
+        } else if (src_flow->base_layer == LAYER_3) {
+            flow->dl_type = htons(0xffff);
         } else if (ntohs(src_flow->dl_type) < ETH_TYPE_MIN) {
             /* See comments in odp_flow_key_from_flow__(). */
             VLOG_ERR_RL(&rl, "mask expected for non-Ethernet II frame");
@@ -5213,7 +5235,10 @@  odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
         put_ethernet_key(eth_key, flow);
         flow->base_layer = LAYER_2;
         expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
-    } else {
+    } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PACKET_ETHERTYPE)) {
+        flow->base_layer = LAYER_3;
+        expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PACKET_ETHERTYPE;
+    } else if (is_mask && src_flow->base_layer == LAYER_3) {
         flow->base_layer = LAYER_3;
     }
 
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 8c010db1a9e7..1a1c920c6293 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -58,7 +58,7 @@  static void ofp_print_error(struct ds *, enum ofperr);
 /* Returns a string that represents the contents of the packet in the
  * 'len' bytes starting at 'data'.  The caller must free the returned string.*/
 char *
-ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
+ofp_packet_to_string(const void *data, size_t len, ovs_be16 packet_ethertype)
 {
     struct ds ds = DS_EMPTY_INITIALIZER;
     struct dp_packet buf;
@@ -66,7 +66,9 @@  ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
     size_t l4_size;
 
     dp_packet_use_const(&buf, data, len);
-    if (is_layer3) {
+    if (packet_ethertype) {
+        /* This is a layer 3 packet */
+        buf.md.packet_ethertype = packet_ethertype;
         buf.l3_ofs = 0;
     }
     flow_extract(&buf, &flow);
@@ -99,6 +101,17 @@  ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
     return ds_cstr(&ds);
 }
 
+/* Returns a string that represents the contents of the packet in the
+ * 'len' bytes starting at 'data'.  The caller must free the returned string.*/
+char *
+ofp_dp_packet_to_string(const struct dp_packet *p)
+{
+    ovs_assert(!dp_packet_is_l3(p) || ntohs(p->md.packet_ethertype));
+    return ofp_packet_to_string(dp_packet_data(p), dp_packet_size(p),
+                                dp_packet_is_l3(p) ? p->md.packet_ethertype
+                                : htons(0));
+}
+
 static void
 format_hex_arg(struct ds *s, const uint8_t *data, size_t len)
 {
@@ -203,7 +216,7 @@  ofp_print_packet_in(struct ds *string, const struct ofp_header *oh,
 
     if (verbosity > 0) {
         char *packet = ofp_packet_to_string(public->packet,
-                                            public->packet_len, false);
+                                            public->packet_len, htons(0));
         ds_put_cstr(string, packet);
         free(packet);
     }
@@ -239,7 +252,8 @@  ofp_print_packet_out(struct ds *string, const struct ofp_header *oh,
     if (po.buffer_id == UINT32_MAX) {
         ds_put_format(string, " data_len=%"PRIuSIZE, po.packet_len);
         if (verbosity > 0 && po.packet_len > 0) {
-            char *packet = ofp_packet_to_string(po.packet, po.packet_len, false);
+            char *packet = ofp_packet_to_string(po.packet, po.packet_len,
+                                                htons(0));
             ds_put_char(string, '\n');
             ds_put_cstr(string, packet);
             free(packet);
@@ -3583,5 +3597,5 @@  ofp_print(FILE *stream, const void *oh, size_t len, int verbosity)
 void
 ofp_print_packet(FILE *stream, const void *data, size_t len)
 {
-    print_and_free(stream, ofp_packet_to_string(data, len, false));
+    print_and_free(stream, ofp_packet_to_string(data, len, htons(0)));
 }
diff --git a/lib/packets.c b/lib/packets.c
index baa59feaa5ef..b5ccba2077d3 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -232,16 +232,24 @@  push_eth(struct dp_packet *packet, const struct eth_addr *dst,
     eh->eth_src = *src;
 }
 
-/* Removes Ethernet header, including all VLAN and MPLS headers, from 'packet'.
+/* Removes Ethernet header, including VLAN header, from 'packet'.
  *
  * Previous to calling this function, 'ofpbuf_l3(packet)' must not be NULL */
 void
 pop_eth(struct dp_packet *packet)
 {
+    char *l2_5 = dp_packet_l2_5(packet);;
+    int increment;
+
     ovs_assert(dp_packet_l3(packet) != NULL);
 
-    dp_packet_resize_l2_5(packet, -packet->l3_ofs);
-    dp_packet_set_l2_5(packet, NULL);
+    if (l2_5) {
+        increment = packet->l2_5_ofs;
+    } else {
+        increment = packet->l3_ofs;
+    }
+
+    dp_packet_resize_l2(packet, -increment);
 }
 
 /* Set ethertype of the packet. */