Message ID | 20220510142202.1087967-10-emma.finn@intel.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Actions Infrastructure + Optimizations | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | success | test: success |
On 10 May 2022, at 16:22, Emma Finn wrote: > This commit adds the AVX512 implementation of the > push_vlan action. > > Signed-off-by: Emma Finn <emma.finn@intel.com> > --- > lib/odp-execute-avx512.c | 56 ++++++++++++++++++++++++++++++++++++--- > lib/odp-execute-private.c | 1 + > lib/odp-execute.c | 24 ++++++++++------- > 3 files changed, 69 insertions(+), 12 deletions(-) > > diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c > index 637956236..5d095f867 100644 > --- a/lib/odp-execute-avx512.c > +++ b/lib/odp-execute-avx512.c > @@ -42,6 +42,13 @@ static inline void ALWAYS_INLINE > avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) > { > /* Update packet size/data pointers */ > + if (resize_by_bytes >= 0) { > + dp_packet_prealloc_headroom(b, resize_by_bytes); > + } else { > + ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= > + -resize_by_bytes); > + } > + > dp_packet_set_data(b, (char *) dp_packet_data(b) - resize_by_bytes); > dp_packet_set_size(b, dp_packet_size(b) + resize_by_bytes); See previous patch, please make this first part equal to dp_packet_resize_l2_5() > @@ -51,7 +58,7 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) > > /* Only these lanes can be incremented/decremented for L2. */ > const uint8_t k_lanes = 0b1110; > - __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN); > + __m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes)); Guess this should be part of the previous patch (all changes in this function). > > /* Load packet and compare with UINT16_MAX */ > void *adjust_ptr = &b->l2_pad_size; > @@ -63,8 +70,16 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) > v_u16_max); > > /* Update VLAN_HEADER_LEN using compare mask, store results. */ > - __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, > - v_adjust_src, v_offset); > + __m128i v_adjust_wip; > + > + if (resize_by_bytes >= 0) { > + v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp, > + v_adjust_src, v_offset); Alignment > + } else { > + v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, > + v_adjust_src, v_offset); Alignment > + } > + > _mm_storeu_si128(adjust_ptr, v_adjust_wip); > } > > @@ -90,6 +105,40 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, > } > } > > +static void > +action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, > + const struct nlattr *a, > + bool should_steal OVS_UNUSED) Alignment > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + ovs_be16 tpid, tci; > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + tpid = vlan->vlan_tpid; > + tci = vlan->vlan_tci; > + > + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); > + > + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ > + char *pkt_data = (char *) dp_packet_data(packet); > + const uint16_t tci_proc = tci & htons(~VLAN_CFI); > + const uint32_t tpid_tci = (tci_proc << 16) | tpid; > + > + static const uint8_t vlan_push_shuffle_mask[16] = { > + 4, 5, 6, 7, 8, 9, 10, 11, > + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF > + }; > + Please add comments so that people can understand what is happening without having to read Intel's Intrinsics Guide. > + __m128i v_ether = _mm_loadu_si128((void *) pkt_data); > + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); > + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); > + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); > + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); > + Remove extra line > + } > +} > + > /* Probe functions to check ISA requirements. */ > static int32_t > avx512_isa_probe(uint32_t needs_vbmi) > @@ -130,6 +179,7 @@ action_avx512_init(struct odp_execute_action_impl *self) > { > avx512_isa_probe(0); > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; > > return 0; > } > diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c > index 8257bba80..6b09ad353 100644 > --- a/lib/odp-execute-private.c > +++ b/lib/odp-execute-private.c > @@ -228,6 +228,7 @@ int32_t > action_autoval_init(struct odp_execute_action_impl *self) > { > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic; > > return 0; > } > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index ba532101f..690e7e1ce 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -845,6 +845,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, > } > } > > +static void > +action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, > + const struct nlattr *a OVS_UNUSED, > + bool should_steal OVS_UNUSED) > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > + } > +} > + > /* Implementation of the scalar actions impl init function. Build up the > * array of func ptrs here. > */ > @@ -852,6 +865,7 @@ int32_t > odp_action_scalar_init(struct odp_execute_action_impl *self) > { > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; > > return 0; > } > @@ -994,15 +1008,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, > break; > } > > - case OVS_ACTION_ATTR_PUSH_VLAN: { > - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > - > - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > - eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > - } > - break; > - } > - > case OVS_ACTION_ATTR_PUSH_MPLS: { > const struct ovs_action_push_mpls *mpls = nl_attr_get(a); > > @@ -1147,6 +1152,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, > case OVS_ACTION_ATTR_OUTPUT: > case OVS_ACTION_ATTR_LB_OUTPUT: > case OVS_ACTION_ATTR_POP_VLAN: > + case OVS_ACTION_ATTR_PUSH_VLAN: Move this to the special, to be created section of scalar actions. > case OVS_ACTION_ATTR_TUNNEL_PUSH: > case OVS_ACTION_ATTR_TUNNEL_POP: > case OVS_ACTION_ATTR_USERSPACE: > -- > 2.25.1
diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c index 637956236..5d095f867 100644 --- a/lib/odp-execute-avx512.c +++ b/lib/odp-execute-avx512.c @@ -42,6 +42,13 @@ static inline void ALWAYS_INLINE avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) { /* Update packet size/data pointers */ + if (resize_by_bytes >= 0) { + dp_packet_prealloc_headroom(b, resize_by_bytes); + } else { + ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= + -resize_by_bytes); + } + dp_packet_set_data(b, (char *) dp_packet_data(b) - resize_by_bytes); dp_packet_set_size(b, dp_packet_size(b) + resize_by_bytes); @@ -51,7 +58,7 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) /* Only these lanes can be incremented/decremented for L2. */ const uint8_t k_lanes = 0b1110; - __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN); + __m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes)); /* Load packet and compare with UINT16_MAX */ void *adjust_ptr = &b->l2_pad_size; @@ -63,8 +70,16 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes) v_u16_max); /* Update VLAN_HEADER_LEN using compare mask, store results. */ - __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, - v_adjust_src, v_offset); + __m128i v_adjust_wip; + + if (resize_by_bytes >= 0) { + v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp, + v_adjust_src, v_offset); + } else { + v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, + v_adjust_src, v_offset); + } + _mm_storeu_si128(adjust_ptr, v_adjust_wip); } @@ -90,6 +105,40 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, } } +static void +action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, + const struct nlattr *a, + bool should_steal OVS_UNUSED) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + ovs_be16 tpid, tci; + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + tpid = vlan->vlan_tpid; + tci = vlan->vlan_tci; + + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); + + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ + char *pkt_data = (char *) dp_packet_data(packet); + const uint16_t tci_proc = tci & htons(~VLAN_CFI); + const uint32_t tpid_tci = (tci_proc << 16) | tpid; + + static const uint8_t vlan_push_shuffle_mask[16] = { + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF + }; + + __m128i v_ether = _mm_loadu_si128((void *) pkt_data); + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); + + } +} + /* Probe functions to check ISA requirements. */ static int32_t avx512_isa_probe(uint32_t needs_vbmi) @@ -130,6 +179,7 @@ action_avx512_init(struct odp_execute_action_impl *self) { avx512_isa_probe(0); self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; return 0; } diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c index 8257bba80..6b09ad353 100644 --- a/lib/odp-execute-private.c +++ b/lib/odp-execute-private.c @@ -228,6 +228,7 @@ int32_t action_autoval_init(struct odp_execute_action_impl *self) { self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic; return 0; } diff --git a/lib/odp-execute.c b/lib/odp-execute.c index ba532101f..690e7e1ce 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -845,6 +845,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, } } +static void +action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, + const struct nlattr *a OVS_UNUSED, + bool should_steal OVS_UNUSED) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); + } +} + /* Implementation of the scalar actions impl init function. Build up the * array of func ptrs here. */ @@ -852,6 +865,7 @@ int32_t odp_action_scalar_init(struct odp_execute_action_impl *self) { self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; return 0; } @@ -994,15 +1008,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, break; } - case OVS_ACTION_ATTR_PUSH_VLAN: { - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); - - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { - eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); - } - break; - } - case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nl_attr_get(a); @@ -1147,6 +1152,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, case OVS_ACTION_ATTR_OUTPUT: case OVS_ACTION_ATTR_LB_OUTPUT: case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_TUNNEL_PUSH: case OVS_ACTION_ATTR_TUNNEL_POP: case OVS_ACTION_ATTR_USERSPACE:
This commit adds the AVX512 implementation of the push_vlan action. Signed-off-by: Emma Finn <emma.finn@intel.com> --- lib/odp-execute-avx512.c | 56 ++++++++++++++++++++++++++++++++++++--- lib/odp-execute-private.c | 1 + lib/odp-execute.c | 24 ++++++++++------- 3 files changed, 69 insertions(+), 12 deletions(-)