Message ID | 20220112094244.81402-9-emma.finn@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | Actions Infrastructure + Optimizations | expand |
> This commit adds the AVX512 implementation of the push_vlan action. > The implementation here is auto-validated by the miniflow > extract autovalidator, hence its correctness can be easily > tested and verified. > > Signed-off-by: Emma Finn <emma.finn@intel.com> Hi Emma, thanks for the patch. I think given the conversation on patch 7 of this series would help me work through this better. Overall I understand the majority of the new action logic and design but just have a few questions that we can discuss on patch 7 that will no doubt help me complete review here. Thanks Ian > --- > lib/odp-execute-avx512.c | 62 +++++++++++++++++++++++++++++++++++---- > lib/odp-execute-private.c | 1 + > lib/odp-execute.c | 24 +++++++++------ > 3 files changed, 72 insertions(+), 15 deletions(-) > > diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c > index fcf27f070..03c0fd446 100644 > --- a/lib/odp-execute-avx512.c > +++ b/lib/odp-execute-avx512.c > @@ -43,6 +43,13 @@ static inline void ALWAYS_INLINE > avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) > { > /* update packet size/data pointers */ > + if (resize_by_bytes >= 0) { > + dp_packet_prealloc_headroom(b, resize_by_bytes); > + } else { > + ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= > + -resize_by_bytes); > + } > + > dp_packet_set_data(b, (char *) dp_packet_data(b) - resize_by_bytes); > dp_packet_set_size(b, dp_packet_size(b) + resize_by_bytes); > > @@ -50,9 +57,9 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int > resize_by_bytes) > const __m128i v_zeros = _mm_setzero_si128(); > const __m128i v_u16_max = _mm_cmpeq_epi16(v_zeros, v_zeros); > > - /* Only these lanes can be incremented for push-VLAN action. */ > + /* Only these lanes can be incremented/decremented for L2. */ > const uint8_t k_lanes = 0b1110; > - __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN); > + __m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes)); > > /* Load packet and compare with UINT16_MAX */ > void *adjust_ptr = &b->l2_pad_size; > @@ -60,9 +67,17 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int > resize_by_bytes) > __mmask8 k_cmp = _mm_mask_cmpneq_epu16_mask(k_lanes, v_adjust_src, > v_u16_max); > > - /* Add VLAN_HEADER_LEN using compare mask, store results. */ > - __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, > - v_adjust_src, v_offset); > + /* Update VLAN_HEADER_LEN using compare mask, store results. */ > + __m128i v_adjust_wip; > + > + if (resize_by_bytes >= 0) { > + v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp, > + v_adjust_src, v_offset); > + } else { > + v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, > + v_adjust_src, v_offset); > + } > + > _mm_storeu_si128(adjust_ptr, v_adjust_wip); > > } > @@ -80,7 +95,6 @@ avx512_eth_pop_vlan(struct dp_packet *packet) > 16 - VLAN_HEADER_LEN); > _mm_storeu_si128((void *) veh, v_realign); > avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN); > - > } > } > > @@ -96,6 +110,41 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct > dp_packet_batch *batch, > } > } > > +static inline void ALWAYS_INLINE > +avx512_eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci) > +{ > + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); > + > + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ > + char *pkt_data = (char *) dp_packet_data(packet); > + const uint16_t tci_proc = tci & htons(~VLAN_CFI); > + const uint32_t tpid_tci = (tci_proc << 16) | tpid; > + > + static const uint8_t vlan_push_shuffle_mask[16] = { > + 4, 5, 6, 7, 8, 9, 10, 11, > + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF > + }; > + > + __m128i v_ether = _mm_loadu_si128((void *) pkt_data); > + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); > + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); > + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); > + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); > +} > + > +static void > +action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch > *batch, > + const struct nlattr *a, > + bool should_steal OVS_UNUSED) > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + avx512_eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > + } > +} > + > /* Probe functions to check ISA requirements. */ > static int32_t > avx512_isa_probe(uint32_t needs_vbmi) > @@ -136,6 +185,7 @@ action_avx512_init(struct odp_execute_action_impl > *self) > { > avx512_isa_probe(0); > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; > > return 0; > } > diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c > index 175a80159..607f0fa94 100644 > --- a/lib/odp-execute-private.c > +++ b/lib/odp-execute-private.c > @@ -218,6 +218,7 @@ int32_t > action_autoval_init(struct odp_execute_action_impl *self) > { > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic; > > return 0; > } > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index 1bc9fae09..40f71fa96 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -842,6 +842,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct > dp_packet_batch *batch, > } > } > > +static void > +action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, > + const struct nlattr *a OVS_UNUSED, > + bool should_steal OVS_UNUSED) > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > + } > +} > + > /* Implementation of the scalar actions impl init function. Build up the > * array of func ptrs here. > */ > @@ -849,6 +862,7 @@ int32_t > odp_action_scalar_init(struct odp_execute_action_impl *self) > { > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; > > return 0; > } > @@ -991,15 +1005,6 @@ odp_execute_actions(void *dp, struct > dp_packet_batch *batch, bool steal, > break; > } > > - case OVS_ACTION_ATTR_PUSH_VLAN: { > - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > - > - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > - eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > - } > - break; > - } > - > case OVS_ACTION_ATTR_PUSH_MPLS: { > const struct ovs_action_push_mpls *mpls = nl_attr_get(a); > > @@ -1133,6 +1138,7 @@ odp_execute_actions(void *dp, struct > dp_packet_batch *batch, bool steal, > case OVS_ACTION_ATTR_OUTPUT: > case OVS_ACTION_ATTR_LB_OUTPUT: > case OVS_ACTION_ATTR_POP_VLAN: > + case OVS_ACTION_ATTR_PUSH_VLAN: > case OVS_ACTION_ATTR_TUNNEL_PUSH: > case OVS_ACTION_ATTR_TUNNEL_POP: > case OVS_ACTION_ATTR_USERSPACE: > -- > 2.25.1
diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c index fcf27f070..03c0fd446 100644 --- a/lib/odp-execute-avx512.c +++ b/lib/odp-execute-avx512.c @@ -43,6 +43,13 @@ static inline void ALWAYS_INLINE avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) { /* update packet size/data pointers */ + if (resize_by_bytes >= 0) { + dp_packet_prealloc_headroom(b, resize_by_bytes); + } else { + ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= + -resize_by_bytes); + } + dp_packet_set_data(b, (char *) dp_packet_data(b) - resize_by_bytes); dp_packet_set_size(b, dp_packet_size(b) + resize_by_bytes); @@ -50,9 +57,9 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) const __m128i v_zeros = _mm_setzero_si128(); const __m128i v_u16_max = _mm_cmpeq_epi16(v_zeros, v_zeros); - /* Only these lanes can be incremented for push-VLAN action. */ + /* Only these lanes can be incremented/decremented for L2. */ const uint8_t k_lanes = 0b1110; - __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN); + __m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes)); /* Load packet and compare with UINT16_MAX */ void *adjust_ptr = &b->l2_pad_size; @@ -60,9 +67,17 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) __mmask8 k_cmp = _mm_mask_cmpneq_epu16_mask(k_lanes, v_adjust_src, v_u16_max); - /* Add VLAN_HEADER_LEN using compare mask, store results. */ - __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, - v_adjust_src, v_offset); + /* Update VLAN_HEADER_LEN using compare mask, store results. */ + __m128i v_adjust_wip; + + if (resize_by_bytes >= 0) { + v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp, + v_adjust_src, v_offset); + } else { + v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, + v_adjust_src, v_offset); + } + _mm_storeu_si128(adjust_ptr, v_adjust_wip); } @@ -80,7 +95,6 @@ avx512_eth_pop_vlan(struct dp_packet *packet) 16 - VLAN_HEADER_LEN); _mm_storeu_si128((void *) veh, v_realign); avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN); - } } @@ -96,6 +110,41 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, } } +static inline void ALWAYS_INLINE +avx512_eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci) +{ + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); + + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ + char *pkt_data = (char *) dp_packet_data(packet); + const uint16_t tci_proc = tci & htons(~VLAN_CFI); + const uint32_t tpid_tci = (tci_proc << 16) | tpid; + + static const uint8_t vlan_push_shuffle_mask[16] = { + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF + }; + + __m128i v_ether = _mm_loadu_si128((void *) pkt_data); + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); +} + +static void +action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, + const struct nlattr *a, + bool should_steal OVS_UNUSED) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + avx512_eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); + } +} + /* Probe functions to check ISA requirements. */ static int32_t avx512_isa_probe(uint32_t needs_vbmi) @@ -136,6 +185,7 @@ action_avx512_init(struct odp_execute_action_impl *self) { avx512_isa_probe(0); self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; return 0; } diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c index 175a80159..607f0fa94 100644 --- a/lib/odp-execute-private.c +++ b/lib/odp-execute-private.c @@ -218,6 +218,7 @@ int32_t action_autoval_init(struct odp_execute_action_impl *self) { self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic; return 0; } diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 1bc9fae09..40f71fa96 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -842,6 +842,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, } } +static void +action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, + const struct nlattr *a OVS_UNUSED, + bool should_steal OVS_UNUSED) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); + } +} + /* Implementation of the scalar actions impl init function. Build up the * array of func ptrs here. */ @@ -849,6 +862,7 @@ int32_t odp_action_scalar_init(struct odp_execute_action_impl *self) { self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; return 0; } @@ -991,15 +1005,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, break; } - case OVS_ACTION_ATTR_PUSH_VLAN: { - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); - - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { - eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); - } - break; - } - case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nl_attr_get(a); @@ -1133,6 +1138,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, case OVS_ACTION_ATTR_OUTPUT: case OVS_ACTION_ATTR_LB_OUTPUT: case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_TUNNEL_PUSH: case OVS_ACTION_ATTR_TUNNEL_POP: case OVS_ACTION_ATTR_USERSPACE:
This commit adds the AVX512 implementation of the push_vlan action. The implementation here is auto-validated by the miniflow extract autovalidator, hence its correctness can be easily tested and verified. Signed-off-by: Emma Finn <emma.finn@intel.com> --- lib/odp-execute-avx512.c | 62 +++++++++++++++++++++++++++++++++++---- lib/odp-execute-private.c | 1 + lib/odp-execute.c | 24 +++++++++------ 3 files changed, 72 insertions(+), 15 deletions(-)