diff mbox series

[ovs-dev,v4,8/9] odp-execute: Add ISA implementation of pop_vlan action.

Message ID 20220105165349.3447695-9-emma.finn@intel.com
State Superseded
Headers show
Series Actions Infrastructure + Optimizations | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Finn, Emma Jan. 5, 2022, 4:53 p.m. UTC
This commit adds the AVX512 implementation of the pop_vlan action.
The implementation here is auto-validated by the miniflow
extract autovalidator, hence its correctness can be easily
tested and verified.

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
 NEWS                      |  1 +
 lib/odp-execute-avx512.c  | 77 ++++++++++++++++++++++++++++++++++++++-
 lib/odp-execute-private.c |  2 +-
 lib/odp-execute-private.h |  2 +-
 4 files changed, 79 insertions(+), 3 deletions(-)

Comments

Van Haaren, Harry Jan. 6, 2022, 1:11 p.m. UTC | #1
> -----Original Message-----
> From: Finn, Emma <emma.finn@intel.com>
> Sent: Wednesday, January 5, 2022 4:54 PM
> To: dev@openvswitch.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
> Amber, Kumar <kumar.amber@intel.com>
> Cc: Finn, Emma <emma.finn@intel.com>
> Subject: [PATCH v4 8/9] odp-execute: Add ISA implementation of pop_vlan
> action.
> 
> This commit adds the AVX512 implementation of the pop_vlan action.
> The implementation here is auto-validated by the miniflow
> extract autovalidator, hence its correctness can be easily
> tested and verified.
> 
> Signed-off-by: Emma Finn <emma.finn@intel.com>

Some comments below for variable renaming & clarity.


> diff --git a/NEWS b/NEWS
> index f13722ab7..f5032bdd0 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -21,6 +21,7 @@ Post-v2.16.0
>       * Add build time configure command to enable auto-validator as default
>         actions implementation at build time.
>       * Add AVX512 implementation of actions.
> +     * Add support for an AVX512 optimized version of pop_vlan action.

I'm not sure how verbose we like NEWS files, I think the "pop vlan" action is a bit
too detailed, the above "add avx512 implementation of actions".

<snip>

> +static inline void ALWAYS_INLINE
> +avx512_dp_packet_resize_l2(struct dp_packet *b, int increment)
> +{

"increment" here is not a great variable name, as we reduce the size by increment.
Really, it is the size to change the L2 that this parameter communicates. Rename
to "size" for a generic description, or perhaps "resize_by_bytes" for more detail?

This is an int variable as negative increments are used to "pop" data out of the packet.

This function gets reworked to be more generic in the last  patch, so I'll review more
in detail there.

<snip remainder of resize l2 func>


> +static inline void ALWAYS_INLINE
> +avx512_eth_pop_vlan(struct dp_packet *packet)
> +{
> +    struct vlan_eth_header *veh = dp_packet_eth(packet);
> +
> +    if (veh && dp_packet_size(packet) >= sizeof *veh &&
> +        eth_type_vlan(veh->veth_type)) {

Verified that the checks here are the same as those on scalar path.

<snip>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index f13722ab7..f5032bdd0 100644
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,7 @@  Post-v2.16.0
      * Add build time configure command to enable auto-validator as default
        actions implementation at build time.
      * Add AVX512 implementation of actions.
+     * Add support for an AVX512 optimized version of pop_vlan action.
    - Python:
      * For SSL support, the use of the pyOpenSSL library has been replaced
        with the native 'ssl' module.
diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index aa71faa1c..8bbfd5203 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -14,6 +14,11 @@ 
  * limitations under the License.
  */
 
+#ifdef __x86_64__
+/* Sparse cannot handle the AVX512 instructions. */
+#if !defined(__CHECKER__)
+
+
 #include <config.h>
 #include <errno.h>
 
@@ -25,6 +30,71 @@ 
 
 #include "immintrin.h"
 
+VLOG_DEFINE_THIS_MODULE(odp_execute_avx512);
+BUILD_ASSERT_DECL(offsetof(struct dp_packet, l2_5_ofs) +
+                  MEMBER_SIZEOF(struct dp_packet, l2_5_ofs) ==
+                  offsetof(struct dp_packet, l3_ofs));
+
+BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
+                           MEMBER_SIZEOF(struct dp_packet, l3_ofs) ==
+                           offsetof(struct dp_packet, l4_ofs));
+
+static inline void ALWAYS_INLINE
+avx512_dp_packet_resize_l2(struct dp_packet *b, int increment)
+{
+    /* update packet size/data pointers */
+    dp_packet_set_data(b, (char *) dp_packet_data(b) - increment);
+    dp_packet_set_size(b, dp_packet_size(b) + increment);
+
+    /* Increment u16 packet offset values */
+    const __m128i v_zeros = _mm_setzero_si128();
+    const __m128i v_u16_max = _mm_cmpeq_epi16(v_zeros, v_zeros);
+
+    /* Only these lanes can be incremented for push-VLAN action. */
+    const uint8_t k_lanes = 0b1110;
+    __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN);
+
+    /* Load packet and compare with UINT16_MAX */
+    void *adjust_ptr = &b->l2_pad_size;
+    __m128i v_adjust_src = _mm_loadu_si128(adjust_ptr);
+    __mmask8 k_cmp = _mm_mask_cmpneq_epu16_mask(k_lanes, v_adjust_src,
+                                                    v_u16_max);
+
+    /* Add VLAN_HEADER_LEN using compare mask, store results. */
+    __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
+                                              v_adjust_src, v_offset);
+    _mm_storeu_si128(adjust_ptr, v_adjust_wip);
+
+}
+
+static inline void ALWAYS_INLINE
+avx512_eth_pop_vlan(struct dp_packet *packet)
+{
+    struct vlan_eth_header *veh = dp_packet_eth(packet);
+
+    if (veh && dp_packet_size(packet) >= sizeof *veh &&
+        eth_type_vlan(veh->veth_type)) {
+
+        __m128i v_ether = _mm_loadu_si128((void *) veh);
+        __m128i v_realign = _mm_alignr_epi8(v_ether, _mm_setzero_si128(),
+                                            16 - VLAN_HEADER_LEN);
+        _mm_storeu_si128((void *) veh, v_realign);
+        avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN);
+
+    }
+}
+
+static void
+action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+                       const struct nlattr *a OVS_UNUSED,
+                       bool should_steal OVS_UNUSED)
+{
+    struct dp_packet *packet;
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        avx512_eth_pop_vlan(packet);
+    }
+}
 
 /* Probe functions to check ISA requirements. */
 static int32_t
@@ -62,8 +132,13 @@  action_avx512_probe(void)
 
 
 int32_t
-action_avx512_init(void)
+action_avx512_init(struct odp_execute_action_impl *self)
 {
     avx512_isa_probe(0);
+    self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
+
     return 0;
 }
+
+#endif
+#endif
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index 19a04f518..7c58d90d2 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -52,7 +52,7 @@  static struct odp_execute_action_impl action_impls[] = {
         .available = 1,
         .name = "avx512",
         .probe = action_avx512_probe,
-        .init_func = NULL,
+        .init_func = action_avx512_init,
     },
     #endif
 };
diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
index 4c09bee63..5ba2868bf 100644
--- a/lib/odp-execute-private.h
+++ b/lib/odp-execute-private.h
@@ -102,7 +102,7 @@  int32_t odp_execute_action_set(const char *name,
 int32_t odp_action_scalar_init(struct odp_execute_action_impl *self);
 
 /* Init function for the optimized with AVX512 actions. */
-int32_t action_avx512_init(void);
+int32_t action_avx512_init(struct odp_execute_action_impl *self);
 
 /* Probe function to check ISA requirements. */
 int32_t action_avx512_probe(void);