@@ -22,6 +22,7 @@ Post-v2.16.0
actions implementation at build time.
* Add AVX512 implementation of actions.
* Add support for an AVX512 optimized version of pop_vlan action.
+ * Add support for an AVX512 optimized version of push_vlan action.
- Python:
* For SSL support, the use of the pyOpenSSL library has been replaced
with the native 'ssl' module.
@@ -39,10 +39,22 @@ BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
MEMBER_SIZEOF(struct dp_packet, l3_ofs) ==
offsetof(struct dp_packet, l4_ofs));
+enum OPERATION {
+ OP_ADD,
+ OP_SUB,
+};
+
static inline void ALWAYS_INLINE
-avx512_dp_packet_resize_l2(struct dp_packet *b, int increment)
+avx512_dp_packet_resize_l2(struct dp_packet *b, int increment,
+ enum OPERATION op)
{
/* update packet size/data pointers */
+ if (increment >= 0) {
+ dp_packet_prealloc_headroom(b, increment);
+ } else {
+ ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= -increment);
+ }
+
dp_packet_set_data(b, (char *) dp_packet_data(b) - increment);
dp_packet_set_size(b, dp_packet_size(b) + increment);
@@ -50,9 +62,9 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int increment)
const __m128i v_zeros = _mm_setzero_si128();
const __m128i v_u16_max = _mm_cmpeq_epi16(v_zeros, v_zeros);
- /* Only these lanes can be incremented for push-VLAN action. */
+ /* Only these lanes can be incremented/decremented for L2. */
const uint8_t k_lanes = 0b1110;
- __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN);
+ __m128i v_offset = _mm_set1_epi16(abs(increment));
/* Load packet and compare with UINT16_MAX */
void *adjust_ptr = &b->l2_pad_size;
@@ -60,9 +72,19 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int increment)
__mmask8 k_cmp = _mm_mask_cmpneq_epu16_mask(k_lanes, v_adjust_src,
v_u16_max);
- /* Add VLAN_HEADER_LEN using compare mask, store results. */
- __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
- v_adjust_src, v_offset);
+ /* Update VLAN_HEADER_LEN using compare mask, store results. */
+ __m128i v_adjust_wip;
+ switch (op) {
+ case OP_ADD:
+ v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp,
+ v_adjust_src, v_offset);
+ break;
+ case OP_SUB:
+ v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
+ v_adjust_src, v_offset);
+ break;
+ }
+
_mm_storeu_si128(adjust_ptr, v_adjust_wip);
}
@@ -79,8 +101,7 @@ avx512_eth_pop_vlan(struct dp_packet *packet)
__m128i v_realign = _mm_alignr_epi8(v_ether, _mm_setzero_si128(),
16 - VLAN_HEADER_LEN);
_mm_storeu_si128((void *) veh, v_realign);
- avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN);
-
+ avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN, OP_SUB);
}
}
@@ -96,6 +117,42 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
}
}
+static inline void ALWAYS_INLINE
+avx512_eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci)
+{
+ avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN, OP_ADD);
+
+ /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
+ char *new_pkt_data = (char *) dp_packet_data(packet);
+
+ const uint16_t tci_proc = tci & htons(~VLAN_CFI);
+ const uint32_t tpid_tci = (tci_proc << 16) | tpid;
+ __m128i v_ether = _mm_loadu_si128((void *) new_pkt_data);
+
+ static const uint8_t vlan_push_shuffle_mask[16] = {
+ 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
+ };
+
+ __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
+ __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
+ __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
+ _mm_storeu_si128((void *) new_pkt_data, v_vlan_hdr);
+}
+
+static void
+action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+ const struct nlattr *a,
+ bool should_steal OVS_UNUSED)
+{
+ struct dp_packet *packet;
+ const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
+
+ DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+ avx512_eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
+ }
+}
+
/* Probe functions to check ISA requirements. */
static int32_t
avx512_isa_probe(uint32_t needs_vbmi)
@@ -136,6 +193,7 @@ action_avx512_init(struct odp_execute_action_impl *self)
{
avx512_isa_probe(0);
self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
+ self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
return 0;
}
@@ -218,6 +218,7 @@ int32_t
action_autoval_init(struct odp_execute_action_impl *self)
{
self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
+ self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
return 0;
}
@@ -842,6 +842,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
}
}
+static void
+action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
+ const struct nlattr *a OVS_UNUSED,
+ bool should_steal OVS_UNUSED)
+{
+ struct dp_packet *packet;
+ const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
+
+ DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+ eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
+ }
+}
+
/* Implementation of the scalar actions impl init function. Build up the
* array of func ptrs here.
*/
@@ -849,6 +862,7 @@ int32_t
odp_action_scalar_init(struct odp_execute_action_impl *self)
{
self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
+ self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
return 0;
}
@@ -991,15 +1005,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
break;
}
- case OVS_ACTION_ATTR_PUSH_VLAN: {
- const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
-
- DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
- eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
- }
- break;
- }
-
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
@@ -1133,6 +1138,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_POP_VLAN:
+ case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
This commit adds the AVX512 implementation of the push_vlan action. The implementation here is auto-validated by the miniflow extract autovalidator, hence its correctness can be easily tested and verified. Signed-off-by: Emma Finn <emma.finn@intel.com> --- NEWS | 1 + lib/odp-execute-avx512.c | 74 ++++++++++++++++++++++++++++++++++----- lib/odp-execute-private.c | 1 + lib/odp-execute.c | 24 ++++++++----- 4 files changed, 83 insertions(+), 17 deletions(-)