diff mbox series

[ovs-dev,v6,09/11] odp-execute: Add ISA implementation of push_vlan action.

Message ID 20220510142202.1087967-10-emma.finn@intel.com
State Changes Requested
Headers show
Series Actions Infrastructure + Optimizations | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Finn, Emma May 10, 2022, 2:22 p.m. UTC
This commit adds the AVX512 implementation of the
push_vlan action.

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
 lib/odp-execute-avx512.c  | 56 ++++++++++++++++++++++++++++++++++++---
 lib/odp-execute-private.c |  1 +
 lib/odp-execute.c         | 24 ++++++++++-------
 3 files changed, 69 insertions(+), 12 deletions(-)

Comments

Eelco Chaudron June 2, 2022, 2:36 p.m. UTC | #1
On 10 May 2022, at 16:22, Emma Finn wrote:

> This commit adds the AVX512 implementation of the
> push_vlan action.
>
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---
>  lib/odp-execute-avx512.c  | 56 ++++++++++++++++++++++++++++++++++++---
>  lib/odp-execute-private.c |  1 +
>  lib/odp-execute.c         | 24 ++++++++++-------
>  3 files changed, 69 insertions(+), 12 deletions(-)
>
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index 637956236..5d095f867 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -42,6 +42,13 @@ static inline void ALWAYS_INLINE
>  avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
>  {
>      /* Update packet size/data pointers */
> +    if (resize_by_bytes >= 0) {
> +        dp_packet_prealloc_headroom(b, resize_by_bytes);
> +    } else {
> +        ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >=
> +                    -resize_by_bytes);
> +    }
> +
>      dp_packet_set_data(b, (char *) dp_packet_data(b) - resize_by_bytes);
>      dp_packet_set_size(b, dp_packet_size(b) + resize_by_bytes);

See previous patch, please make this first part equal to dp_packet_resize_l2_5()


> @@ -51,7 +58,7 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
>
>      /* Only these lanes can be incremented/decremented for L2. */
>      const uint8_t k_lanes = 0b1110;
> -    __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN);
> +    __m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes));

Guess this should be part of the previous patch (all changes in this function).
>
>      /* Load packet and compare with UINT16_MAX */
>      void *adjust_ptr = &b->l2_pad_size;
> @@ -63,8 +70,16 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
>                                                      v_u16_max);
>
>      /* Update VLAN_HEADER_LEN using compare mask, store results. */
> -    __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
> -                                              v_adjust_src, v_offset);
> +    __m128i v_adjust_wip;
> +
> +    if (resize_by_bytes >= 0) {
> +        v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp,
> +                                            v_adjust_src, v_offset);

Alignment

> +    } else {
> +        v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
> +                                            v_adjust_src, v_offset);

Alignment

> +    }
> +
>      _mm_storeu_si128(adjust_ptr, v_adjust_wip);
>  }
>
> @@ -90,6 +105,40 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
>      }
>  }
>
> +static void
> +action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
> +                       const struct nlattr *a,
> +                       bool should_steal OVS_UNUSED)

Alignment

> +{
> +    struct dp_packet *packet;
> +    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> +    ovs_be16 tpid, tci;
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        tpid = vlan->vlan_tpid;
> +        tci = vlan->vlan_tci;
> +
> +        avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
> +
> +        /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
> +        char *pkt_data = (char *) dp_packet_data(packet);
> +        const uint16_t tci_proc = tci & htons(~VLAN_CFI);
> +        const uint32_t tpid_tci = (tci_proc << 16) | tpid;
> +
> +        static const uint8_t vlan_push_shuffle_mask[16] = {
> +        4, 5, 6, 7, 8, 9, 10, 11,
> +        12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
> +        };
> +

Please add comments so that people can understand what is happening without having to read Intel's Intrinsics Guide.

> +        __m128i v_ether = _mm_loadu_si128((void *) pkt_data);
> +        __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
> +        __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
> +        __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
> +        _mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
> +

Remove extra line

> +    }
> +}
> +
>  /* Probe functions to check ISA requirements. */
>  static int32_t
>  avx512_isa_probe(uint32_t needs_vbmi)
> @@ -130,6 +179,7 @@ action_avx512_init(struct odp_execute_action_impl *self)
>  {
>      avx512_isa_probe(0);
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
>
>      return 0;
>  }
> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> index 8257bba80..6b09ad353 100644
> --- a/lib/odp-execute-private.c
> +++ b/lib/odp-execute-private.c
> @@ -228,6 +228,7 @@ int32_t
>  action_autoval_init(struct odp_execute_action_impl *self)
>  {
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
>
>      return 0;
>  }
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index ba532101f..690e7e1ce 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -845,6 +845,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
>      }
>  }
>
> +static void
> +action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
> +                const struct nlattr *a OVS_UNUSED,
> +                bool should_steal OVS_UNUSED)
> +{
> +    struct dp_packet *packet;
> +    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> +    }
> +}
> +
>  /* Implementation of the scalar actions impl init function. Build up the
>   * array of func ptrs here.
>   */
> @@ -852,6 +865,7 @@ int32_t
>  odp_action_scalar_init(struct odp_execute_action_impl *self)
>  {
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
>
>      return 0;
>  }
> @@ -994,15 +1008,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>              break;
>          }
>
> -        case OVS_ACTION_ATTR_PUSH_VLAN: {
> -            const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> -
> -            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> -                eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> -            }
> -            break;
> -        }
> -
>          case OVS_ACTION_ATTR_PUSH_MPLS: {
>              const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
>
> @@ -1147,6 +1152,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>          case OVS_ACTION_ATTR_OUTPUT:
>          case OVS_ACTION_ATTR_LB_OUTPUT:
>          case OVS_ACTION_ATTR_POP_VLAN:
> +        case OVS_ACTION_ATTR_PUSH_VLAN:

Move this to the special, to be created section of scalar actions.

>          case OVS_ACTION_ATTR_TUNNEL_PUSH:
>          case OVS_ACTION_ATTR_TUNNEL_POP:
>          case OVS_ACTION_ATTR_USERSPACE:
> -- 
> 2.25.1
diff mbox series

Patch

diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index 637956236..5d095f867 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -42,6 +42,13 @@  static inline void ALWAYS_INLINE
 avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
 {
     /* Update packet size/data pointers */
+    if (resize_by_bytes >= 0) {
+        dp_packet_prealloc_headroom(b, resize_by_bytes);
+    } else {
+        ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >=
+                    -resize_by_bytes);
+    }
+
     dp_packet_set_data(b, (char *) dp_packet_data(b) - resize_by_bytes);
     dp_packet_set_size(b, dp_packet_size(b) + resize_by_bytes);
 
@@ -51,7 +58,7 @@  avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
 
     /* Only these lanes can be incremented/decremented for L2. */
     const uint8_t k_lanes = 0b1110;
-    __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN);
+    __m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes));
 
     /* Load packet and compare with UINT16_MAX */
     void *adjust_ptr = &b->l2_pad_size;
@@ -63,8 +70,16 @@  avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
                                                     v_u16_max);
 
     /* Update VLAN_HEADER_LEN using compare mask, store results. */
-    __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
-                                              v_adjust_src, v_offset);
+    __m128i v_adjust_wip;
+
+    if (resize_by_bytes >= 0) {
+        v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp,
+                                            v_adjust_src, v_offset);
+    } else {
+        v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
+                                            v_adjust_src, v_offset);
+    }
+
     _mm_storeu_si128(adjust_ptr, v_adjust_wip);
 }
 
@@ -90,6 +105,40 @@  action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
     }
 }
 
+static void
+action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+                       const struct nlattr *a,
+                       bool should_steal OVS_UNUSED)
+{
+    struct dp_packet *packet;
+    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
+    ovs_be16 tpid, tci;
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        tpid = vlan->vlan_tpid;
+        tci = vlan->vlan_tci;
+
+        avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
+
+        /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
+        char *pkt_data = (char *) dp_packet_data(packet);
+        const uint16_t tci_proc = tci & htons(~VLAN_CFI);
+        const uint32_t tpid_tci = (tci_proc << 16) | tpid;
+
+        static const uint8_t vlan_push_shuffle_mask[16] = {
+        4, 5, 6, 7, 8, 9, 10, 11,
+        12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
+        };
+
+        __m128i v_ether = _mm_loadu_si128((void *) pkt_data);
+        __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
+        __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
+        __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
+        _mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
+
+    }
+}
+
 /* Probe functions to check ISA requirements. */
 static int32_t
 avx512_isa_probe(uint32_t needs_vbmi)
@@ -130,6 +179,7 @@  action_avx512_init(struct odp_execute_action_impl *self)
 {
     avx512_isa_probe(0);
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
+    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
 
     return 0;
 }
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index 8257bba80..6b09ad353 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -228,6 +228,7 @@  int32_t
 action_autoval_init(struct odp_execute_action_impl *self)
 {
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
+    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
 
     return 0;
 }
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index ba532101f..690e7e1ce 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -845,6 +845,19 @@  action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
     }
 }
 
+static void
+action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+                const struct nlattr *a OVS_UNUSED,
+                bool should_steal OVS_UNUSED)
+{
+    struct dp_packet *packet;
+    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
+    }
+}
+
 /* Implementation of the scalar actions impl init function. Build up the
  * array of func ptrs here.
  */
@@ -852,6 +865,7 @@  int32_t
 odp_action_scalar_init(struct odp_execute_action_impl *self)
 {
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
+    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
 
     return 0;
 }
@@ -994,15 +1008,6 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             break;
         }
 
-        case OVS_ACTION_ATTR_PUSH_VLAN: {
-            const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
-
-            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-                eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
-            }
-            break;
-        }
-
         case OVS_ACTION_ATTR_PUSH_MPLS: {
             const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
 
@@ -1147,6 +1152,7 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_OUTPUT:
         case OVS_ACTION_ATTR_LB_OUTPUT:
         case OVS_ACTION_ATTR_POP_VLAN:
+        case OVS_ACTION_ATTR_PUSH_VLAN:
         case OVS_ACTION_ATTR_TUNNEL_PUSH:
         case OVS_ACTION_ATTR_TUNNEL_POP:
         case OVS_ACTION_ATTR_USERSPACE: