Message ID | 20220614115743.1143341-10-emma.finn@intel.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev,v7,01/11] ofproto-dpif: Fix incorrect checksums in input packets | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | fail | test: fail |
On 14 Jun 2022, at 13:57, Emma Finn wrote: > This commit adds the AVX512 implementation of the > push_vlan action. > > Signed-off-by: Emma Finn <emma.finn@intel.com> > --- > lib/odp-execute-avx512.c | 37 +++++++++++++++++++++++++++++++++++++ > lib/odp-execute-private.c | 1 + > lib/odp-execute.c | 22 +++++++++++++--------- > 3 files changed, 51 insertions(+), 9 deletions(-) > > diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c > index f9e2b1727..bb178cbac 100644 > --- a/lib/odp-execute-avx512.c > +++ b/lib/odp-execute-avx512.c > @@ -109,6 +109,41 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch, > } > } > > +/* This function will load the entire eth_header into a 128-bit wide register. > + * Then use an 8-byte shuffle to shift the data left to make room for > + * the vlan header. Insert the new vlan header and then store back to the > + * original packet. */ > +static void > +action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + ovs_be16 tpid, tci; > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + tpid = vlan->vlan_tpid; > + tci = vlan->vlan_tci; > + > + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); > + > + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ > + char *pkt_data = (char *) dp_packet_data(packet); > + const uint16_t tci_proc = tci & htons(~VLAN_CFI); > + const uint32_t tpid_tci = (tci_proc << 16) | tpid; > + > + static const uint8_t vlan_push_shuffle_mask[16] = { > + 4, 5, 6, 7, 8, 9, 10, 11, > + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF > + }; > + > + __m128i v_ether = _mm_loadu_si128((void *) pkt_data); > + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); > + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); > + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); > + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); > + } > +} > + > /* Probe functions to check ISA requirements. */ > static bool > avx512_isa_probe(void) > @@ -140,6 +175,8 @@ action_avx512_init(struct odp_execute_action_impl *self) > /* Set function pointers for actions that can be applied directly, these > * are identified by OVS_ACTION_ATTR_*. */ > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; > + > return 0; > } > > diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c > index de2e4dfc4..751a68fe3 100644 > --- a/lib/odp-execute-private.c > +++ b/lib/odp-execute-private.c > @@ -209,6 +209,7 @@ action_autoval_init(struct odp_execute_action_impl *self) > /* Set function pointers for actions that can be applied directly, these > * are identified by OVS_ACTION_ATTR_*. */ > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic; Based on my previous comments I think this should no longer be needed. > > return 0; > } > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index a49b331ef..59f6bdc64 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -845,6 +845,17 @@ action_pop_vlan(struct dp_packet_batch *batch, > } > } > > +static void > +action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) > +{ > + struct dp_packet *packet; > + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > + } > +} > + > /* Implementation of the scalar actions impl init function. Build up the > * array of func ptrs here. > */ > @@ -854,6 +865,7 @@ odp_action_scalar_init(struct odp_execute_action_impl *self) > /* Set function pointers for actions that can be applied directly, these > * are identified by OVS_ACTION_ATTR_*. */ > self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; > + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; > > return 0; > } > @@ -995,15 +1007,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, > break; > } > > - case OVS_ACTION_ATTR_PUSH_VLAN: { > - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); > - > - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > - eth_push_vlanog og eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); > - } > - break; > - } > - > case OVS_ACTION_ATTR_PUSH_MPLS: { > const struct ovs_action_push_mpls *mpls = nl_attr_get(a); > > @@ -1156,6 +1159,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, > case __OVS_ACTION_ATTR_MAX: > /* The following actions are handled by the scalar implementation. */ > case OVS_ACTION_ATTR_POP_VLAN: > + case OVS_ACTION_ATTR_PUSH_VLAN: > OVS_NOT_REACHED(); > } > > -- > 2.32.0 Other than the one remark above I have nothing to add other than some more explicit comments on the AVX section. See diff below: diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c index 7b0e00ffc..54313cd1f 100644 --- a/lib/odp-execute-avx512.c +++ b/lib/odp-execute-avx512.c @@ -154,10 +154,8 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch, } } -/* This function will load the entire eth_header into a 128-bit wide register. - * Then use an 8-byte shuffle to shift the data left to make room for - * the vlan header. Insert the new vlan header and then store back to the - * original packet. */ +/* This function performance the same operation on each packet in the batch as + * the scalar eth_push_vlan() function. */ static void action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) { @@ -169,22 +167,42 @@ action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) tpid = vlan->vlan_tpid; tci = vlan->vlan_tci; + /* As we are about to insert the VLAN_HEADER we now need to adjust all + * the offsets. */ avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); - /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ char *pkt_data = (char *) dp_packet_data(packet); + + /* Build up the VLAN TCI/TPID in a single uint32_t. */ const uint16_t tci_proc = tci & htons(~VLAN_CFI); const uint32_t tpid_tci = (tci_proc << 16) | tpid; + /* This shuffle mask is used below, and each position tells where to + * move the bytes to. So here, the fourth byte in v_ether is moved to + * byte location 0 in v_shift. The fifth is moved to 1, etc., etc. + * The 0xFF is special it tells to fill that position with 0. + */ static const uint8_t vlan_push_shuffle_mask[16] = { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF }; + /* Load the first 128-bits of the packet into the v_ether register. + * Note that this includes the 4 unused bytes (VLAN_HEADER_LEN). */ __m128i v_ether = _mm_loadu_si128((void *) pkt_data); + + /* Load the shuffle mask in v_index. */ __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); + + /* Move(shuffle) the veth_dst and veth_src data to create room for + * the vlan header. */ __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); + + /* Copy(insert) the 32-bit VLAN header, tpid_tci, at the 3rd 32-bit + * word offset, i.e., ofssetof(vlan_eth_header, veth_type) */ __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); + + /* Write back the modified ethernet header. */ _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); } }
diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c index f9e2b1727..bb178cbac 100644 --- a/lib/odp-execute-avx512.c +++ b/lib/odp-execute-avx512.c @@ -109,6 +109,41 @@ action_avx512_pop_vlan(struct dp_packet_batch *batch, } } +/* This function will load the entire eth_header into a 128-bit wide register. + * Then use an 8-byte shuffle to shift the data left to make room for + * the vlan header. Insert the new vlan header and then store back to the + * original packet. */ +static void +action_avx512_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + ovs_be16 tpid, tci; + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + tpid = vlan->vlan_tpid; + tci = vlan->vlan_tci; + + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN); + + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ + char *pkt_data = (char *) dp_packet_data(packet); + const uint16_t tci_proc = tci & htons(~VLAN_CFI); + const uint32_t tpid_tci = (tci_proc << 16) | tpid; + + static const uint8_t vlan_push_shuffle_mask[16] = { + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF + }; + + __m128i v_ether = _mm_loadu_si128((void *) pkt_data); + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); + _mm_storeu_si128((void *) pkt_data, v_vlan_hdr); + } +} + /* Probe functions to check ISA requirements. */ static bool avx512_isa_probe(void) @@ -140,6 +175,8 @@ action_avx512_init(struct odp_execute_action_impl *self) /* Set function pointers for actions that can be applied directly, these * are identified by OVS_ACTION_ATTR_*. */ self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; + return 0; } diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c index de2e4dfc4..751a68fe3 100644 --- a/lib/odp-execute-private.c +++ b/lib/odp-execute-private.c @@ -209,6 +209,7 @@ action_autoval_init(struct odp_execute_action_impl *self) /* Set function pointers for actions that can be applied directly, these * are identified by OVS_ACTION_ATTR_*. */ self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic; return 0; } diff --git a/lib/odp-execute.c b/lib/odp-execute.c index a49b331ef..59f6bdc64 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -845,6 +845,17 @@ action_pop_vlan(struct dp_packet_batch *batch, } } +static void +action_push_vlan(struct dp_packet_batch *batch, const struct nlattr *a) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); + } +} + /* Implementation of the scalar actions impl init function. Build up the * array of func ptrs here. */ @@ -854,6 +865,7 @@ odp_action_scalar_init(struct odp_execute_action_impl *self) /* Set function pointers for actions that can be applied directly, these * are identified by OVS_ACTION_ATTR_*. */ self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; return 0; } @@ -995,15 +1007,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, break; } - case OVS_ACTION_ATTR_PUSH_VLAN: { - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); - - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { - eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); - } - break; - } - case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nl_attr_get(a); @@ -1156,6 +1159,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, case __OVS_ACTION_ATTR_MAX: /* The following actions are handled by the scalar implementation. */ case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_VLAN: OVS_NOT_REACHED(); }
This commit adds the AVX512 implementation of the push_vlan action. Signed-off-by: Emma Finn <emma.finn@intel.com> --- lib/odp-execute-avx512.c | 37 +++++++++++++++++++++++++++++++++++++ lib/odp-execute-private.c | 1 + lib/odp-execute.c | 22 +++++++++++++--------- 3 files changed, 51 insertions(+), 9 deletions(-)