diff mbox series

[ovs-dev,RFC,1/2] dpif-netdev: Refactor the miniflow extraction logic using PTYPEs.

Message ID 1515779133-60826-1-git-send-email-bhanuprakash.bodireddy@intel.com
State Accepted
Delegated to: Ian Stokes
Headers show
Series [ovs-dev,RFC,1/2] dpif-netdev: Refactor the miniflow extraction logic using PTYPEs. | expand

Commit Message

Bodireddy, Bhanuprakash Jan. 12, 2018, 5:45 p.m. UTC
This commit refactors the miniflow extraction logic based on PTYPEs.

If the NIC supports PTYPEs, the packet_type field is populated and
the PTYPE value can be used to check if the packet is a tunnel packet.
In case of tunnel packet, tunnel information is populated in to
packet metadata and miniflow extraction is performed. This is used
by future commit that skips the recirculation on vxlan decapsulation.

If PTYPEs isn't supported by a NIC, this patch doesn't do any thing.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
---
 lib/dp-packet.h   | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/dpif-netdev.c | 29 ++++++++++++++++++++++++--
 lib/flow.c        | 38 ++++++++++++++++++++++++++++++++++
 lib/flow.h        |  1 +
 4 files changed, 127 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index b4b721c..9ff42d7 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -80,6 +80,11 @@  struct dp_packet {
     };
 };
 
+struct ptype_offsets {
+    uint16_t l3_ofs;
+    uint16_t l4_ofs;
+};
+
 static inline void *dp_packet_data(const struct dp_packet *);
 static inline void dp_packet_set_data(struct dp_packet *, void *);
 static inline void *dp_packet_base(const struct dp_packet *);
@@ -567,10 +572,16 @@  dp_packet_set_data(struct dp_packet *b, void *data)
 }
 
 static inline void
-dp_packet_reset_packet(struct dp_packet *b, int off)
+__dp_packet_reset_packet(struct dp_packet *b, int off)
 {
     dp_packet_set_size(b, dp_packet_size(b) - off);
     dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
+}
+
+static inline void
+dp_packet_reset_packet(struct dp_packet *b, int off)
+{
+    __dp_packet_reset_packet(b, off);
     dp_packet_reset_offsets(b);
 }
 
@@ -691,6 +702,55 @@  reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
 #define reset_dp_packet_checksum_ol_flags(arg)
 #endif
 
+static inline bool
+dp_packet_is_encapsulated(struct dp_packet *p OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+    return RTE_ETH_IS_TUNNEL_PKT(p->mbuf.packet_type);
+#else
+    return false;
+#endif
+}
+
+/*
+ * To use ptypes
+ *  82599ES(Niantic)
+ *    Disable vectorization. (Requires CONFIG_RTE_IXGBE_INC_VECTOR=n
+ *                               in config/common_base.)
+ *  XL710/X710(FVL)
+ *    Firmware version >= 5.04 for correct ptype indentification.
+ */
+static inline bool
+dp_packet_is_tunnel(struct dp_packet *p OVS_UNUSED,
+                    struct ptype_offsets *pt_ofs OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+#define VXLAN_DST_PORT 4789
+#define ETH_HDR_LEN 14
+    uint32_t ptype = p->mbuf.packet_type;
+
+    /* XXX: Use dp_packet_is_encapsulated(dp_packet) when
+     * ptypes is fixed in future. */
+    if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) {
+        uint32_t hdr_len = (ptype & RTE_PTYPE_L3_IPV4) ?
+                           ETH_HDR_LEN + IP_HEADER_LEN :
+                           ETH_HDR_LEN + IPV6_HEADER_LEN;
+
+        if (pt_ofs) {
+            pt_ofs->l3_ofs = ETH_HDR_LEN;
+            pt_ofs->l4_ofs = hdr_len;
+        }
+        struct udp_header *udp_hdr = dp_packet_at(p, hdr_len, UDP_HEADER_LEN);
+        if (udp_hdr && udp_hdr->udp_dst == htons(VXLAN_DST_PORT)) {
+            return true;
+        }
+    }
+    return false;
+#else
+    return false;
+#endif
+}
+
 enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
 
 struct dp_packet_batch {
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ef8ebf2..a34a1c5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4987,6 +4987,30 @@  dp_netdev_queue_batches(struct dp_packet *pkt,
     packet_batch_per_flow_update(batch, pkt, mf);
 }
 
+static void
+packet_mf_extract(struct dp_packet *pkt, struct netdev_flow_key *key)
+{
+    struct ptype_offsets pt_ofs;
+    if (dp_packet_is_tunnel(pkt, &pt_ofs)) {
+       /* pkt->l3_ofs & pkt->l4_ofs should be valid before invoking
+        * netdev_vxlan_pop_header. pkt->l3_ofs, pkt->l4_ofs are set
+        * as part of miniflow extraction. This means dp_packet_l3() and
+        * dp_packet_l4() works only if mf extraction is invoked early during
+        * packet processing.
+        *
+        * In this case pop header operation should be performed before
+        * mf extraction, so set the dp_packet offsets from the pt_ofs.
+        */
+        pkt->l3_ofs = pt_ofs.l3_ofs;
+        pkt->l4_ofs = pt_ofs.l4_ofs;
+        tunnel_flow_extract(pkt, &key->mf);
+    } else {
+        miniflow_extract(pkt, &key->mf);
+    }
+
+    key->len = 0; /* Not computed yet. */
+}
+
 /* Try to process all ('cnt') the 'packets' using only the exact match cache
  * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
  * miniflow is copied into 'keys' and the packet pointer is moved at the
@@ -5037,8 +5061,9 @@  emc_processing(struct dp_netdev_pmd_thread *pmd,
         if (!md_is_valid) {
             pkt_metadata_init(&packet->md, port_no);
         }
-        miniflow_extract(packet, &key->mf);
-        key->len = 0; /* Not computed yet. */
+
+        packet_mf_extract(packet, key);
+
         /* If EMC is disabled skip hash computation and emc_lookup */
         if (cur_min) {
             if (!md_is_valid) {
diff --git a/lib/flow.c b/lib/flow.c
index f9d7c2a..badf7e7 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -32,6 +32,7 @@ 
 #include "openvswitch/dynamic-string.h"
 #include "hash.h"
 #include "jhash.h"
+#include "netdev-native-tnl.h"
 #include "openvswitch/match.h"
 #include "dp-packet.h"
 #include "openflow/openflow.h"
@@ -971,6 +972,43 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
     dst->map = mf.map;
 }
 
+void
+tunnel_flow_extract(struct dp_packet *packet, struct miniflow *dst)
+{
+    /* vxlan_pop_header shall reset all of tunnel meta data, so
+     * backup the inport and packet offsets before invoking it. */
+    odp_port_t odp_port = packet->md.in_port.odp_port;
+    uint16_t l3_ofs = packet->l3_ofs;
+    uint16_t l4_ofs = packet->l4_ofs;
+
+    /* This would populate the flow_tnl structure, changes the offsets
+     *  of the packet. */
+    netdev_vxlan_pop_header(packet);
+
+    packet->md.in_port.odp_port = odp_port;
+
+    /* As tnl info is populated in packet metadata, do the miniflow
+     * extraction of the packet now. */
+    miniflow_extract(packet, dst);
+
+    /* Reset the data and offsets. */
+    __dp_packet_reset_packet(packet,
+                    -(l4_ofs +
+                      sizeof(struct udp_header) +
+                      sizeof(struct vxlanhdr)));
+
+    /* netdev_vxlan_pop_header()
+     *   dp_packet_reset_packet()
+     *      dp_packet_reset_offsets()
+     *          packet->l3_ofs = UINT16_MAX
+     *          packet->l4_ofs = UINT16_MAX
+     *
+     * vxlan_pop_header() resets the offsets and hence need to be restored.
+     */
+    packet->l3_ofs = l3_ofs;
+    packet->l4_ofs = l4_ofs;
+}
+
 ovs_be16
 parse_dl_type(const struct eth_header *data_, size_t size)
 {
diff --git a/lib/flow.h b/lib/flow.h
index eb1e2bf..395a6c8 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -533,6 +533,7 @@  struct pkt_metadata;
  * 'dst->map' is ignored on input and set on output to indicate which fields
  * were extracted. */
 void miniflow_extract(struct dp_packet *packet, struct miniflow *dst);
+void tunnel_flow_extract(struct dp_packet *packet, struct miniflow *);
 void miniflow_map_init(struct miniflow *, const struct flow *);
 void flow_wc_map(const struct flow *, struct flowmap *);
 size_t miniflow_alloc(struct miniflow *dsts[], size_t n,