diff mbox series

[ovs-dev,v10,09/10] odp-execute: Add ISA implementation of set_masked ETH

Message ID 20220713182807.3416578-10-harry.van.haaren@intel.com
State Changes Requested
Headers show
Series Actions Infrastructure + Optimizations | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Van Haaren, Harry July 13, 2022, 6:28 p.m. UTC
From: Emma Finn <emma.finn@intel.com>

This commit includes infrastructure changes for enabling set_masked_X
actions and also adds support for the AVX512 implementation of the
eth_set_addrs action.

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
 lib/odp-execute-avx512.c  | 90 +++++++++++++++++++++++++++++++++++++++
 lib/odp-execute-private.c | 14 ++++++
 lib/odp-execute-private.h |  3 ++
 lib/odp-execute.c         | 49 +++++++++++----------
 lib/odp-execute.h         |  3 ++
 5 files changed, 137 insertions(+), 22 deletions(-)

Comments

Kumar Amber July 14, 2022, 7:03 a.m. UTC | #1
Hey all,

Have tested the all the patches in the series.

<Snip>

Tested-by: Kumar Amber <kumar.amber@intel.com>

BR
Amber
Pai G, Sunil July 14, 2022, 10:06 a.m. UTC | #2
> -----Original Message-----
> From: Van Haaren, Harry <harry.van.haaren@intel.com>
> Sent: Wednesday, July 13, 2022 11:58 PM
> To: dev@openvswitch.org
> Cc: i.maximets@ovn.org; echaudro@redhat.com; Amber, Kumar
> <kumar.amber@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Finn, Emma
> <emma.finn@intel.com>; Stokes, Ian <ian.stokes@intel.com>
> Subject: [PATCH v10 09/10] odp-execute: Add ISA implementation of
> set_masked ETH
> 
> From: Emma Finn <emma.finn@intel.com>
> 
> This commit includes infrastructure changes for enabling set_masked_X
> actions and also adds support for the AVX512 implementation of the
> eth_set_addrs action.
> 
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---
>  lib/odp-execute-avx512.c  | 90 +++++++++++++++++++++++++++++++++++++++
>  lib/odp-execute-private.c | 14 ++++++
>  lib/odp-execute-private.h |  3 ++
>  lib/odp-execute.c         | 49 +++++++++++----------
>  lib/odp-execute.h         |  3 ++
>  5 files changed, 137 insertions(+), 22 deletions(-)
> 

LGTM, 
Acked-by: Sunil Pai G <sunil.pai.g@intel.com>
Eelco Chaudron July 14, 2022, 1:23 p.m. UTC | #3
> From: Emma Finn <emma.finn@intel.com>
>
> This commit includes infrastructure changes for enabling set_masked_X
> actions and also adds support for the AVX512 implementation of the
> eth_set_addrs action.
>
> Signed-off-by: Emma Finn <emma.finn@intel.com>
> ---
>  lib/odp-execute-avx512.c  | 90 +++++++++++++++++++++++++++++++++++++++
>  lib/odp-execute-private.c | 14 ++++++
>  lib/odp-execute-private.h |  3 ++
>  lib/odp-execute.c         | 49 +++++++++++----------
>  lib/odp-execute.h         |  3 ++
>  5 files changed, 137 insertions(+), 22 deletions(-)
>
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index 3449acff7..8ecdaecf6 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -23,6 +23,7 @@
>
>  #include "dp-packet.h"
>  #include "immintrin.h"
> +#include "odp-execute.h"
>  #include "odp-execute-private.h"
>  #include "odp-netlink.h"
>  #include "openvswitch/vlog.h"
> @@ -50,6 +51,16 @@ BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
>  BUILD_ASSERT_DECL(sizeof(struct dp_packet) -
>                    offsetof(struct dp_packet, l2_pad_size) >= sizeof(__m128i));
>
> +/* The below build assert makes sure the order of the fields needed by
> + * the set masked functions shuffle operations do not change. This should not
> + * happen as these are defined under the Linux uapi. */
> +BUILD_ASSERT_DECL(offsetof(struct ovs_key_ethernet, eth_src) +
> +                  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src) ==
> +                  offsetof(struct ovs_key_ethernet, eth_dst));
> +
> +/* Array of callback functions, one for each masked operation. */
> +odp_execute_action_cb impl_set_masked_funcs[__OVS_KEY_ATTR_MAX];
> +
>  static inline void ALWAYS_INLINE
>  avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
>  {
> @@ -207,6 +218,80 @@ action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
>      }
>  }
>
> +/* This function performs the same operation on each packet in the batch as
> + * the scalar odp_eth_set_addrs() function. */
> +static void
> +action_avx512_eth_set_addrs(struct dp_packet_batch *batch,
> +                            const struct nlattr *a)
> +{
> +    const struct ovs_key_ethernet *key, *mask;
> +    struct dp_packet *packet;
> +
> +    a = nl_attr_get(a);
> +    key = nl_attr_get(a);
> +    mask = odp_get_key_mask(a, struct ovs_key_ethernet);
> +
> +    /* Read the content of the key(src) and mask in the respective registers.
> +     * We only load the src and dest addresses, which is only 96-bits and not
> +     * 128-bits. */
> +    __m128i v_src = _mm_maskz_loadu_epi32(0x7,(void *) key);
> +    __m128i v_mask = _mm_maskz_loadu_epi32(0x7, (void *) mask);

One question here I asked throughout the various revisions but got not answered:

"The second load, loads 128 bits of data, but there are only 12 bytes to load. What happens if the memory at the remaining 6 bytes are not mapped in memory (i.e. a page does not exist/can't be loaded)? Will we crash!?
Guess the key is fine, as we will read some bytes of the mask data."


<SNIP>

> +void
> +odp_execute_scalar_action(struct dp_packet_batch *batch,
> +                          const struct nlattr *action)
> +{
> +    enum ovs_action_attr type = nl_attr_type(action);
> +
> +    if (action_impls[ACTION_IMPL_SCALAR].funcs[type] &&
> +        type <= OVS_ACTION_ATTR_MAX) {

Guess the two checks above need to be reversed, i.e. the type <= OVS_ACTION_ATTR_MAX should be first.
> +
> +        action_impls[ACTION_IMPL_SCALAR].funcs[type](batch, action);
> +    }
> +}

<SNIP>

> +static void
> +action_set_masked(struct dp_packet_batch *batch, const struct nlattr *a)
> +{
> +   const struct nlattr *key = nl_attr_get(a);
> +   struct dp_packet *packet;
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        odp_execute_masked_set_action(packet, key);
> +    }

Indentation is off here.

<SNIP>

The rest of the patch looks good to me.

//Eelco
Van Haaren, Harry July 14, 2022, 2:11 p.m. UTC | #4
> -----Original Message-----
> From: Eelco Chaudron <echaudro@redhat.com>
> Sent: Thursday, July 14, 2022 2:24 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: dev@openvswitch.org; i.maximets@ovn.org; Amber, Kumar
> <kumar.amber@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Finn, Emma
> <emma.finn@intel.com>; Stokes, Ian <ian.stokes@intel.com>
> Subject: Re: [PATCH v10 09/10] odp-execute: Add ISA implementation of set_masked
> ETH

<snip patch>

> > +    /* Read the content of the key(src) and mask in the respective registers.
> > +     * We only load the src and dest addresses, which is only 96-bits and not
> > +     * 128-bits. */
> > +    __m128i v_src = _mm_maskz_loadu_epi32(0x7,(void *) key);
> > +    __m128i v_mask = _mm_maskz_loadu_epi32(0x7, (void *) mask);
> 
> One question here I asked throughout the various revisions but got not answered:
> 
> "The second load, loads 128 bits of data, but there are only 12 bytes to load. What
> happens if the memory at the remaining 6 bytes are not mapped in memory (i.e. a
> page does not exist/can't be loaded)? Will we crash!?

AVX512 has some very nice features for handling scenarios where "not full" SIMD is 
required. This feature is known as "k-masks", and in short allows "turning off" part of
the SIMD instruction from having an effect.

In this case, the "maskz" part of the intrinsic means that the k-mask becomes active.
An extra parameter is added to any k-mask instruction (_mm_maskz_*), which indicates
what lanes to enable/disable. Note that the *size* of each lane is determined by the
end of the intrinsic, so _epi32() indicates 32-bit lanes. A worked example below:

_mm_maskz_loadu_epi32(0x7, (void *) mask);

kmask is 0x7, or "111" in binary, so lowest 3 lanes (visualize them on the right) are active.
As the instruction targets 32-bit ints, each lane size is 4 bytes, so 3 * 4 = 12 bytes "active".
As a result, only 12 bytes are loaded from memory here. Even if the next byte was on a new
page, and not mapped into our virtual address range, there would be no crash here due to
the k-mask handling the load.

<snip more patch>
Eelco Chaudron July 14, 2022, 2:33 p.m. UTC | #5
On 14 Jul 2022, at 16:11, Van Haaren, Harry wrote:

>> -----Original Message-----
>> From: Eelco Chaudron <echaudro@redhat.com>
>> Sent: Thursday, July 14, 2022 2:24 PM
>> To: Van Haaren, Harry <harry.van.haaren@intel.com>
>> Cc: dev@openvswitch.org; i.maximets@ovn.org; Amber, Kumar
>> <kumar.amber@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Finn, Emma
>> <emma.finn@intel.com>; Stokes, Ian <ian.stokes@intel.com>
>> Subject: Re: [PATCH v10 09/10] odp-execute: Add ISA implementation of set_masked
>> ETH
>
> <snip patch>
>
>>> +    /* Read the content of the key(src) and mask in the respective registers.
>>> +     * We only load the src and dest addresses, which is only 96-bits and not
>>> +     * 128-bits. */
>>> +    __m128i v_src = _mm_maskz_loadu_epi32(0x7,(void *) key);
>>> +    __m128i v_mask = _mm_maskz_loadu_epi32(0x7, (void *) mask);
>>
>> One question here I asked throughout the various revisions but got not answered:
>>
>> "The second load, loads 128 bits of data, but there are only 12 bytes to load. What
>> happens if the memory at the remaining 6 bytes are not mapped in memory (i.e. a
>> page does not exist/can't be loaded)? Will we crash!?
>
> AVX512 has some very nice features for handling scenarios where "not full" SIMD is
> required. This feature is known as "k-masks", and in short allows "turning off" part of
> the SIMD instruction from having an effect.
>
> In this case, the "maskz" part of the intrinsic means that the k-mask becomes active.
> An extra parameter is added to any k-mask instruction (_mm_maskz_*), which indicates
> what lanes to enable/disable. Note that the *size* of each lane is determined by the
> end of the intrinsic, so _epi32() indicates 32-bit lanes. A worked example below:
>
> _mm_maskz_loadu_epi32(0x7, (void *) mask);
>
> kmask is 0x7, or "111" in binary, so lowest 3 lanes (visualize them on the right) are active.
> As the instruction targets 32-bit ints, each lane size is 4 bytes, so 3 * 4 = 12 bytes "active".
> As a result, only 12 bytes are loaded from memory here. Even if the next byte was on a new
> page, and not mapped into our virtual address range, there would be no crash here due to
> the k-mask handling the load.
>
> <snip more patch>

Thanks, that really answers my question! I guess I should better read the pseudo code on the intrinsics guide :)

//Eelco
diff mbox series

Patch

diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index 3449acff7..8ecdaecf6 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -23,6 +23,7 @@ 
 
 #include "dp-packet.h"
 #include "immintrin.h"
+#include "odp-execute.h"
 #include "odp-execute-private.h"
 #include "odp-netlink.h"
 #include "openvswitch/vlog.h"
@@ -50,6 +51,16 @@  BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
 BUILD_ASSERT_DECL(sizeof(struct dp_packet) -
                   offsetof(struct dp_packet, l2_pad_size) >= sizeof(__m128i));
 
+/* The below build assert makes sure the order of the fields needed by
+ * the set masked functions shuffle operations do not change. This should not
+ * happen as these are defined under the Linux uapi. */
+BUILD_ASSERT_DECL(offsetof(struct ovs_key_ethernet, eth_src) +
+                  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src) ==
+                  offsetof(struct ovs_key_ethernet, eth_dst));
+
+/* Array of callback functions, one for each masked operation. */
+odp_execute_action_cb impl_set_masked_funcs[__OVS_KEY_ATTR_MAX];
+
 static inline void ALWAYS_INLINE
 avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
 {
@@ -207,6 +218,80 @@  action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
     }
 }
 
+/* This function performs the same operation on each packet in the batch as
+ * the scalar odp_eth_set_addrs() function. */
+static void
+action_avx512_eth_set_addrs(struct dp_packet_batch *batch,
+                            const struct nlattr *a)
+{
+    const struct ovs_key_ethernet *key, *mask;
+    struct dp_packet *packet;
+
+    a = nl_attr_get(a);
+    key = nl_attr_get(a);
+    mask = odp_get_key_mask(a, struct ovs_key_ethernet);
+
+    /* Read the content of the key(src) and mask in the respective registers.
+     * We only load the src and dest addresses, which is only 96-bits and not
+     * 128-bits. */
+    __m128i v_src = _mm_maskz_loadu_epi32(0x7,(void *) key);
+    __m128i v_mask = _mm_maskz_loadu_epi32(0x7, (void *) mask);
+
+
+    /* These shuffle masks are used below, and each position tells where to
+     * move the bytes to. So here, the fourth sixth byte in
+     * ovs_key_ethernet is moved to byte location 0 in v_src/v_mask.
+     * The seventh is moved to 1, etc., etc.
+     * This swap is needed to move the src and dest MAC addresses in the
+     * same order as in the ethernet packet. */
+    static const uint8_t eth_shuffle[16] = {
+        6, 7, 8, 9, 10, 11, 0, 1,
+        2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF
+    };
+
+    /* Load the shuffle mask in v_shuf. */
+    __m128i v_shuf = _mm_loadu_si128((void *) eth_shuffle);
+
+    /* Swap the key/mask src and dest addresses to the ethernet order. */
+    v_src = _mm_shuffle_epi8(v_src, v_shuf);
+    v_mask = _mm_shuffle_epi8(v_mask, v_shuf);
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+
+        struct eth_header *eh = dp_packet_eth(packet);
+
+        if (!eh) {
+            continue;
+        }
+
+        /* Load the first 128-bits of the packet into the v_ether register. */
+        __m128i v_dst = _mm_loadu_si128((void *) eh);
+
+        /* AND the v_mask to the packet data (v_dst). */
+        __m128i dst_masked = _mm_andnot_si128(v_mask, v_dst);
+
+        /* OR the new addresses (v_src) with the masked packet addresses
+         * (dst_masked). */
+        __m128i res = _mm_or_si128(v_src, dst_masked);
+
+        /* Write back the modified ethernet addresses. */
+        _mm_storeu_si128((void *) eh, res);
+    }
+}
+
+static void
+action_avx512_set_masked(struct dp_packet_batch *batch, const struct nlattr *a)
+{
+    const struct nlattr *mask = nl_attr_get(a);
+    enum ovs_key_attr attr_type = nl_attr_type(mask);
+
+    if (attr_type <= OVS_KEY_ATTR_MAX && impl_set_masked_funcs[attr_type]) {
+        impl_set_masked_funcs[attr_type](batch, a);
+    } else {
+        odp_execute_scalar_action(batch, a);
+    }
+}
+
 int
 action_avx512_init(struct odp_execute_action_impl *self OVS_UNUSED)
 {
@@ -214,6 +299,11 @@  action_avx512_init(struct odp_execute_action_impl *self OVS_UNUSED)
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_avx512_set_masked;
+
+    /* Set function pointers for the individual operations supported by the
+     * SET_MASKED action. */
+    impl_set_masked_funcs[OVS_KEY_ATTR_ETHERNET] = action_avx512_eth_set_addrs;
 
     return 0;
 }
diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
index 2fabf6c62..ec42d3d17 100644
--- a/lib/odp-execute-private.c
+++ b/lib/odp-execute-private.c
@@ -22,6 +22,7 @@ 
 #include "cpu.h"
 #include "dpdk.h"
 #include "dp-packet.h"
+#include "odp-execute.h"
 #include "odp-execute-private.h"
 #include "odp-netlink.h"
 #include "odp-util.h"
@@ -242,6 +243,19 @@  action_autoval_generic(struct dp_packet_batch *batch, const struct nlattr *a)
     dp_packet_delete_batch(&original_batch, true);
 }
 
+void
+odp_execute_scalar_action(struct dp_packet_batch *batch,
+                          const struct nlattr *action)
+{
+    enum ovs_action_attr type = nl_attr_type(action);
+
+    if (action_impls[ACTION_IMPL_SCALAR].funcs[type] &&
+        type <= OVS_ACTION_ATTR_MAX) {
+
+        action_impls[ACTION_IMPL_SCALAR].funcs[type](batch, action);
+    }
+}
+
 int
 action_autoval_init(struct odp_execute_action_impl *self)
 {
diff --git a/lib/odp-execute-private.h b/lib/odp-execute-private.h
index f66e6e6d1..b3707783f 100644
--- a/lib/odp-execute-private.h
+++ b/lib/odp-execute-private.h
@@ -100,4 +100,7 @@  int action_avx512_init(struct odp_execute_action_impl *self);
 
 void odp_execute_action_get_info(struct ds *name);
 
+void odp_execute_scalar_action(struct dp_packet_batch *batch,
+                               const struct nlattr *action);
+
 #endif /* ODP_EXTRACT_PRIVATE */
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 0c5837640..dafb198bb 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -562,8 +562,6 @@  odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
     }
 }
 
-#define get_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
-
 static void
 odp_execute_masked_set_action(struct dp_packet *packet,
                               const struct nlattr *a)
@@ -575,17 +573,17 @@  odp_execute_masked_set_action(struct dp_packet *packet,
     switch (type) {
     case OVS_KEY_ATTR_PRIORITY:
         md->skb_priority = nl_attr_get_u32(a)
-            | (md->skb_priority & ~*get_mask(a, uint32_t));
+            | (md->skb_priority & ~*odp_get_key_mask(a, uint32_t));
         break;
 
     case OVS_KEY_ATTR_SKB_MARK:
         md->pkt_mark = nl_attr_get_u32(a)
-            | (md->pkt_mark & ~*get_mask(a, uint32_t));
+            | (md->pkt_mark & ~*odp_get_key_mask(a, uint32_t));
         break;
 
     case OVS_KEY_ATTR_ETHERNET:
         odp_eth_set_addrs(packet, nl_attr_get(a),
-                          get_mask(a, struct ovs_key_ethernet));
+                          odp_get_key_mask(a, struct ovs_key_ethernet));
         break;
 
     case OVS_KEY_ATTR_NSH: {
@@ -595,27 +593,27 @@  odp_execute_masked_set_action(struct dp_packet *packet,
 
     case OVS_KEY_ATTR_IPV4:
         odp_set_ipv4(packet, nl_attr_get(a),
-                     get_mask(a, struct ovs_key_ipv4));
+                     odp_get_key_mask(a, struct ovs_key_ipv4));
         break;
 
     case OVS_KEY_ATTR_IPV6:
         odp_set_ipv6(packet, nl_attr_get(a),
-                     get_mask(a, struct ovs_key_ipv6));
+                     odp_get_key_mask(a, struct ovs_key_ipv6));
         break;
 
     case OVS_KEY_ATTR_TCP:
         odp_set_tcp(packet, nl_attr_get(a),
-                    get_mask(a, struct ovs_key_tcp));
+                    odp_get_key_mask(a, struct ovs_key_tcp));
         break;
 
     case OVS_KEY_ATTR_UDP:
         odp_set_udp(packet, nl_attr_get(a),
-                    get_mask(a, struct ovs_key_udp));
+                    odp_get_key_mask(a, struct ovs_key_udp));
         break;
 
     case OVS_KEY_ATTR_SCTP:
         odp_set_sctp(packet, nl_attr_get(a),
-                     get_mask(a, struct ovs_key_sctp));
+                     odp_get_key_mask(a, struct ovs_key_sctp));
         break;
 
     case OVS_KEY_ATTR_MPLS:
@@ -623,33 +621,33 @@  odp_execute_masked_set_action(struct dp_packet *packet,
         if (mh) {
             put_16aligned_be32(&mh->mpls_lse, nl_attr_get_be32(a)
                                | (get_16aligned_be32(&mh->mpls_lse)
-                                  & ~*get_mask(a, ovs_be32)));
+                                  & ~*odp_get_key_mask(a, ovs_be32)));
         }
         break;
 
     case OVS_KEY_ATTR_ARP:
         set_arp(packet, nl_attr_get(a),
-                get_mask(a, struct ovs_key_arp));
+                odp_get_key_mask(a, struct ovs_key_arp));
         break;
 
     case OVS_KEY_ATTR_ND:
         odp_set_nd(packet, nl_attr_get(a),
-                   get_mask(a, struct ovs_key_nd));
+                   odp_get_key_mask(a, struct ovs_key_nd));
         break;
 
     case OVS_KEY_ATTR_ND_EXTENSIONS:
         odp_set_nd_ext(packet, nl_attr_get(a),
-                       get_mask(a, struct ovs_key_nd_extensions));
+                       odp_get_key_mask(a, struct ovs_key_nd_extensions));
         break;
 
     case OVS_KEY_ATTR_DP_HASH:
         md->dp_hash = nl_attr_get_u32(a)
-            | (md->dp_hash & ~*get_mask(a, uint32_t));
+            | (md->dp_hash & ~*odp_get_key_mask(a, uint32_t));
         break;
 
     case OVS_KEY_ATTR_RECIRC_ID:
         md->recirc_id = nl_attr_get_u32(a)
-            | (md->recirc_id & ~*get_mask(a, uint32_t));
+            | (md->recirc_id & ~*odp_get_key_mask(a, uint32_t));
         break;
 
     case OVS_KEY_ATTR_TUNNEL:    /* Masked data not supported for tunnel. */
@@ -857,6 +855,17 @@  action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a)
     }
 }
 
+static void
+action_set_masked(struct dp_packet_batch *batch, const struct nlattr *a)
+{
+   const struct nlattr *key = nl_attr_get(a);
+   struct dp_packet *packet;
+
+    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+        odp_execute_masked_set_action(packet, key);
+    }
+}
+
 /* Implementation of the scalar actions impl init function. Build up the
  * array of func ptrs here.
  */
@@ -867,6 +876,7 @@  odp_action_scalar_init(struct odp_execute_action_impl *self)
      * are identified by OVS_ACTION_ATTR_*. */
     self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
     self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
+    self->funcs[OVS_ACTION_ATTR_SET_MASKED] = action_set_masked;
 
     return 0;
 }
@@ -1078,12 +1088,6 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
             }
             break;
 
-        case OVS_ACTION_ATTR_SET_MASKED:
-            DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
-                odp_execute_masked_set_action(packet, nl_attr_get(a));
-            }
-            break;
-
         case OVS_ACTION_ATTR_SAMPLE:
             DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
                 odp_execute_sample(dp, packet, steal && last_action, a,
@@ -1210,6 +1214,7 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         /* The following actions are handled by the scalar implementation. */
         case OVS_ACTION_ATTR_POP_VLAN:
         case OVS_ACTION_ATTR_PUSH_VLAN:
+        case OVS_ACTION_ATTR_SET_MASKED:
             OVS_NOT_REACHED();
         }
 
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 0921ee924..2ba1ec5d2 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -46,4 +46,7 @@  void odp_execute_actions(void *dp, struct dp_packet_batch *batch,
                          bool steal,
                          const struct nlattr *actions, size_t actions_len,
                          odp_execute_cb dp_execute_action);
+
+#define odp_get_key_mask(a, type) ((const type *)(const void *)(a + 1) + 1)
+
 #endif