Message ID | 20230712052919.1893558-2-mkp@redhat.com |
---|---|
State | Changes Requested, archived |
Headers | show |
Series | [ovs-dev,v4,1/2] userspace: Respect tso/gso segment size. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | success | test: success |
On 7/12/23 07:29, Mike Pattrick wrote: > From: Flavio Leitner <fbl@sysclose.org> > > This provides a software implementation in the case > the egress netdev doesn't support segmentation in hardware. > > The challenge here is to guarantee packet ordering in the > original batch that may be full of TSO packets. Each TSO > packet can go up to ~64kB, so with segment size of 1440 > that means about 44 packets for each TSO. Each batch has > 32 packets, so the total batch amounts to 1408 normal > packets. > > The segmentation estimates the total number of packets > and then the total number of batches. Then allocate > enough memory and finally do the work. > > Finally each batch is sent in order to the netdev. > > Signed-off-by: Flavio Leitner <fbl@sysclose.org> > Co-authored-by: Mike Pattrick <mkp@redhat.com> > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- > v4: > - Various formatting changes > - Fixed memory leak in soft-gso code if packet is flagged > for GSO but incorrectly lacks segment size. > --- > lib/automake.mk | 2 + > lib/dp-packet-gso.c | 173 ++++++++++++++++++++++++++++++++++++++++++++ > lib/dp-packet-gso.h | 23 ++++++ > lib/dp-packet.h | 7 ++ > lib/netdev-dpdk.c | 44 ++++++++--- > lib/netdev-linux.c | 58 --------------- > lib/netdev.c | 134 +++++++++++++++++++++------------- > lib/packets.c | 4 +- > 8 files changed, 322 insertions(+), 123 deletions(-) > create mode 100644 lib/dp-packet-gso.c > create mode 100644 lib/dp-packet-gso.h > > diff --git a/lib/automake.mk b/lib/automake.mk > index e64ee76ce..49a92958d 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \ > lib/dpctl.h \ > lib/dp-packet.h \ > lib/dp-packet.c \ > + lib/dp-packet-gso.c \ > + lib/dp-packet-gso.h \ > lib/dpdk.h \ > lib/dpif-netdev-extract-study.c \ > lib/dpif-netdev-lookup.h \ > diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c > new file mode 100644 > index 000000000..6a007fdad > --- /dev/null > +++ b/lib/dp-packet-gso.c > @@ -0,0 +1,173 @@ > +/* > + * Copyright (c) 2023 Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include <stdlib.h> > +#include <string.h> > + > +#include "coverage.h" > +#include "dp-packet.h" > +#include "dp-packet-gso.h" > +#include "netdev-provider.h" > +#include "openvswitch/vlog.h" > + > +VLOG_DEFINE_THIS_MODULE(dp_packet_gso); > + > +COVERAGE_DEFINE(soft_seg_good); > +COVERAGE_DEFINE(soft_seg_drop); > + > +/* Retuns a new packet that is a segment of packet 'p'. > + * > + * The new packet is initialized with 'hdr_len' bytes from the > + * start of packet 'p' and then appended with 'data_len' bytes > + * from the 'data' buffer. > + * > + * Note: The packet headers are not updated. */ > +static struct dp_packet * > +dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len, > + const char *data, size_t data_len) > +{ > + struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len, > + dp_packet_headroom(p)); > + > + /* Append the original packet headers and then the payload. */ > + dp_packet_put(seg, dp_packet_data(p), hdr_len); > + dp_packet_put(seg, data, data_len); > + > + /* The new segment should have the same offsets. */ > + seg->l2_5_ofs = p->l2_5_ofs; > + seg->l3_ofs = p->l3_ofs; > + seg->l4_ofs = p->l4_ofs; > + > + /* The protocol headers remain the same, so preserve hash and mark. */ > + *dp_packet_rss_ptr(seg) = dp_packet_get_rss_hash(p); This seems a bit inconsistent. We should either just use dp_packet_rss_ptr on both sides, or use get+set functions. > + *dp_packet_flow_mark_ptr(seg) = *dp_packet_flow_mark_ptr(p); > + > + /* The segment should inherit all the offloading flags from the > + * original packet, except for the TCP segmentation, external > + * buffer and indirect buffer flags. */ > + *dp_packet_ol_flags_ptr(seg) = *dp_packet_ol_flags_ptr(p) > + & DP_PACKET_OL_SUPPORTED_MASK; > + > + dp_packet_hwol_reset_tcp_seg(seg); > + > + return seg; > +} > + > +/* Returns the calculated number of TCP segments in packet 'p'. */ > +int > +dp_packet_gso_nr_segs(struct dp_packet *p) > +{ > + uint16_t segsz = dp_packet_get_tso_segsz(p); > + const char *data_tail; > + const char *data_pos; > + > + data_pos = dp_packet_get_tcp_payload(p); > + data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p); > + > + return DIV_ROUND_UP((data_tail - data_pos), segsz); > + You missed the extra line here. > +} > + > +/* Perform software segmentation on packet 'p'. > + * > + * Returns all the segments added to the array of preallocated > + * batches in 'batches' starting at batch position 'batch_pos'. */ The comment is outdated. > +bool > +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) > +{ > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > + struct dp_packet_batch *curr_batch = *batches; > + struct tcp_header *tcp_hdr; > + struct ip_header *ip_hdr; > + struct dp_packet *seg; > + uint16_t tcp_offset; > + uint16_t tso_segsz; > + uint32_t tcp_seq; > + uint16_t ip_id; > + int hdr_len; > + int seg_len; > + > + tso_segsz = dp_packet_get_tso_segsz(p); > + if (!tso_segsz) { > + VLOG_WARN_RL(&rl, "GSO packet with len %d with no segment size.", > + dp_packet_size(p)); Indentation is wrong here. > + COVERAGE_INC(soft_seg_drop); Might be better to move this counter to the caller, i.e. the place where actual drop is happening. > + return false; > + } > + > + tcp_hdr = dp_packet_l4(p); > + tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); > + tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); > + hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p)) > + + tcp_offset * 4; > + ip_id = 0; > + if (dp_packet_hwol_is_ipv4(p)) { > + ip_hdr = dp_packet_l3(p); > + ip_id = ntohs(ip_hdr->ip_id); > + } > + > + const char *data_tail = (char *) dp_packet_tail(p) > + - dp_packet_l2_pad_size(p); > + const char *data_pos = dp_packet_get_tcp_payload(p); > + int n_segs = dp_packet_gso_nr_segs(p); > + > + for (int i = 0; i < n_segs; i++) { > + seg_len = data_tail - data_pos; > + if (seg_len > tso_segsz) { > + seg_len = tso_segsz; > + } > + > + seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); > + data_pos += seg_len; > + > + /* Update L3 header. */ > + if (dp_packet_hwol_is_ipv4(seg)) { > + ip_hdr = dp_packet_l3(seg); > + ip_hdr->ip_tot_len = htons(sizeof *ip_hdr + > + dp_packet_l4_size(seg)); > + ip_hdr->ip_id = htons(ip_id); > + ip_hdr->ip_csum = 0; > + ip_id++; > + } else { > + struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); > + > + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen > + = htons(sizeof *ip_hdr + dp_packet_l4_size(seg)); > + } > + > + /* Update L4 header. */ > + tcp_hdr = dp_packet_l4(seg); > + put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); > + tcp_seq += seg_len; > + if (OVS_LIKELY(i < (n_segs - 1))) { > + /* Reset flags PUSH and FIN unless it is the last segment. */ > + uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) > + & ~(TCP_PSH | TCP_FIN); > + tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); > + } > + > + if (dp_packet_batch_is_full(curr_batch)) { > + curr_batch++; > + } > + > + dp_packet_batch_add(curr_batch, seg); > + } > + > + *batches = curr_batch; > + COVERAGE_INC(soft_seg_good); > + return true; > +} > diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h > new file mode 100644 > index 000000000..9c282fb86 > --- /dev/null > +++ b/lib/dp-packet-gso.h > @@ -0,0 +1,23 @@ > +/* > + * Copyright (c) 2023 Red Hat, Inc. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef DP_PACKET_GSO_H > +#define DP_PACKET_GSO_H 1 > + > +bool dp_packet_gso(struct dp_packet *, struct dp_packet_batch **); > +int dp_packet_gso_nr_segs(struct dp_packet *); > + > +#endif /* dp-packet-gso.h */ > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 6a924f3ff..239d7a3d4 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -1131,6 +1131,13 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; > } > > +/* Resets TCP Segmentation flag in packet 'p'. */ > +static inline void > +dp_packet_hwol_reset_tcp_seg(struct dp_packet *p) > +{ > + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_TCP_SEG; > +} > + > /* Returns 'true' if the IP header has good integrity and the > * checksum in it is complete. */ > static inline bool > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 1c100c48e..8eede8a88 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2288,6 +2288,7 @@ static bool > netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) > { > struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); > + struct tcp_header *th; > > if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK > | RTE_MBUF_F_TX_TCP_SEG))) { > @@ -2299,27 +2300,36 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) > mbuf->l4_len = 0; > mbuf->outer_l2_len = 0; > mbuf->outer_l3_len = 0; > + th = dp_packet_l4(pkt); > > if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > - struct tcp_header *th = dp_packet_l4(pkt); > - int hdr_len; > - > if (!th) { > VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" > + " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > + return false; 2 lines above have strange indentation. > + } > + } > + > + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) { > + if (!th) { > + VLOG_WARN_RL(&rl, "%s: TCP offloading without L4 header" > " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); > return false; > } > > mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; > - mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; > - hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; > - if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) { > - VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. " > - "hdr: %"PRIu32", gso: %"PRIu32", max len: %"PRIu32"", > - dev->up.name, hdr_len, mbuf->tso_segsz, > - dev->max_packet_len); > - return false; > + > + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { > + int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; > + if (OVS_UNLIKELY((hdr_len + > + mbuf->tso_segsz) > dev->max_packet_len)) { > + VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. hdr: %"PRIu32", " > + "gso: %"PRIu32", max len: %"PRIu32"", > + dev->up.name, hdr_len, mbuf->tso_segsz, > + dev->max_packet_len); > + return false; > + } > } > > if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { > @@ -2707,6 +2717,7 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) > mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; > mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & > ~(RTE_MBUF_F_EXTERNAL | RTE_MBUF_F_INDIRECT)); > + mbuf_dest->tso_segsz = pkt_orig->mbuf.tso_segsz; > > memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, > sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); > @@ -2765,11 +2776,20 @@ netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch, > struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > size_t cnt, pkt_cnt = dp_packet_batch_size(batch); > + struct dp_packet *packet; > + bool need_copy = false; > > memset(stats, 0, sizeof *stats); > > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (packet->source != DPBUF_DPDK) { > + need_copy = true; > + break; > + } > + } > + > /* Copy dp-packets to mbufs. */ > - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { > + if (OVS_UNLIKELY(need_copy)) { > cnt = dpdk_copy_batch_to_mbuf(netdev, batch); > stats->tx_failure_drops += pkt_cnt - cnt; > pkt_cnt = cnt; > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 52ce4947d..7a92ee534 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -6849,55 +6849,6 @@ af_packet_sock(void) > return sock; > } > > -static int > -netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto) > -{ > - struct eth_header *eth_hdr; > - ovs_be16 eth_type; > - int l2_len; > - > - eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN); > - if (!eth_hdr) { > - return -EINVAL; > - } > - > - l2_len = ETH_HEADER_LEN; > - eth_type = eth_hdr->eth_type; > - if (eth_type_vlan(eth_type)) { > - struct vlan_header *vlan = dp_packet_at(b, l2_len, VLAN_HEADER_LEN); > - > - if (!vlan) { > - return -EINVAL; > - } > - > - eth_type = vlan->vlan_next_type; > - l2_len += VLAN_HEADER_LEN; > - } > - > - if (eth_type == htons(ETH_TYPE_IP)) { > - struct ip_header *ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN); > - > - if (!ip_hdr) { > - return -EINVAL; > - } > - > - *l4proto = ip_hdr->ip_proto; > - dp_packet_hwol_set_tx_ipv4(b); > - } else if (eth_type == htons(ETH_TYPE_IPV6)) { > - struct ovs_16aligned_ip6_hdr *nh6; > - > - nh6 = dp_packet_at(b, l2_len, IPV6_HEADER_LEN); > - if (!nh6) { > - return -EINVAL; > - } > - > - *l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt; > - dp_packet_hwol_set_tx_ipv6(b); > - } > - > - return 0; > -} > - > /* Initializes packet 'b' with features enabled in the prepended > * struct virtio_net_hdr. Returns 0 if successful, otherwise a > * positive errno value. */ > @@ -6915,15 +6866,6 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) > } > > if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { > - uint16_t l4proto = 0; > - > - if (netdev_linux_parse_l2(b, &l4proto)) { > - return EINVAL; > - } > - > - if (l4proto == IPPROTO_UDP) { > - dp_packet_hwol_set_csum_udp(b); > - } I had a question on v3 regarding why this part of a code is getting removed. Have you missed it? Is it because flags are going to be set later in the miniflow_extract() ? The problem might be that in case of classification offload or simple match lookup the miniflow_extract() will not be called. These code paths use parse_tcp_flags() function to get the necessary information. Hence the flags will not be set and the packet will not be correctly segmented or checksummed. Maybe removal of this part should be a separate commit along with fixing the hw-oofload/simple match use case? > /* The packet has offloaded checksum. However, there is no > * additional information like the protocol used, so it would > * require to parse the packet here. The checksum starting point > diff --git a/lib/netdev.c b/lib/netdev.c > index 8df7f8737..00c8004d9 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -35,6 +35,7 @@ > #include "coverage.h" > #include "dpif.h" > #include "dp-packet.h" > +#include "dp-packet-gso.h" > #include "openvswitch/dynamic-string.h" > #include "fatal-signal.h" > #include "hash.h" > @@ -56,6 +57,7 @@ > #include "svec.h" > #include "openvswitch/vlog.h" > #include "flow.h" > +#include "userspace-tso.h" > #include "util.h" > #ifdef __linux__ > #include "tc.h" > @@ -67,7 +69,6 @@ COVERAGE_DEFINE(netdev_received); > COVERAGE_DEFINE(netdev_sent); > COVERAGE_DEFINE(netdev_add_router); > COVERAGE_DEFINE(netdev_get_stats); > -COVERAGE_DEFINE(netdev_send_prepare_drops); > COVERAGE_DEFINE(netdev_push_header_drops); > > struct netdev_saved_flags { > @@ -792,60 +793,81 @@ netdev_get_pt_mode(const struct netdev *netdev) > : NETDEV_PT_LEGACY_L2); > } > > -/* Check if a 'packet' is compatible with 'netdev_flags'. > - * If a packet is incompatible, return 'false' with the 'errormsg' > - * pointing to a reason. */ > -static bool > -netdev_send_prepare_packet(const uint64_t netdev_flags, > - struct dp_packet *packet, char **errormsg) > -{ > - if (dp_packet_hwol_is_tso(packet) > - && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > - /* Fall back to GSO in software. */ > - VLOG_ERR_BUF(errormsg, "No TSO support"); > - return false; > - } > - > - /* Packet with IP csum offloading enabled was received with verified csum. > - * Leave the IP csum offloading enabled even with good checksum to the > - * netdev to decide what would be the best to do. > - * Provide a software fallback in case the device doesn't support IP csum > - * offloading. Note: Encapsulated packet must have the inner IP header > - * csum already calculated. > - * Packet with L4 csum offloading enabled was received with verified csum. > - * Leave the L4 csum offloading enabled even with good checksum for the > - * netdev to decide what would be the best to do. > - * Netdev that requires pseudo header csum needs to calculate that. > - * Provide a software fallback in case the netdev doesn't support L4 csum > - * offloading. Note: Encapsulated packet must have the inner L4 header > - * csum already calculated. */ > - dp_packet_ol_send_prepare(packet, netdev_flags); > - > - return true; > -} > - > -/* Check if each packet in 'batch' is compatible with 'netdev' features, > - * otherwise either fall back to software implementation or drop it. */ > -static void > -netdev_send_prepare_batch(const struct netdev *netdev, > - struct dp_packet_batch *batch) > +/* Attempts to segment GSO flagged packets and send them as multiple bundles. > + * This function is only used if at least one packet in the current batch is > + * flagged for TSO and the netdev does not support this. > + * > + * The return value is 0 if all batches sent successfully, and an error code > + * from netdev_class->send() if at least one batch failed to send. */ > +static int > +netdev_send_tso(struct netdev *netdev, int qid, > + struct dp_packet_batch *batch, bool concurrent_txq) > { > + struct dp_packet_batch *batches; > struct dp_packet *packet; > - size_t i, size = dp_packet_batch_size(batch); > + int retval = 0; > + int n_packets; > + int n_batches; > + int error; > > - DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { > - char *errormsg = NULL; > + /* Calculate the total number of packets in the batch after > + * the segmentation. */ > + n_packets = 0; > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (dp_packet_hwol_is_tso(packet)) { > + n_packets += dp_packet_gso_nr_segs(packet); > + } else { > + n_packets++; > + } > + } > > - if (netdev_send_prepare_packet(netdev->ol_flags, packet, &errormsg)) { > - dp_packet_batch_refill(batch, packet, i); > + if (!n_packets) { > + return 0; > + } > + > + /* Allocate enough batches to store all the packets in order. */ > + n_batches = DIV_ROUND_UP(n_packets, NETDEV_MAX_BURST); > + batches = xmalloc(n_batches * sizeof *batches); > + > + struct dp_packet_batch *curr_batch = batches; > + struct dp_packet_batch *last_batch = &batches[n_batches - 1]; > + for (curr_batch = batches; curr_batch < last_batch; curr_batch++) { Should be <= ? > + dp_packet_batch_init(curr_batch); > + } > + > + /* Do the packet segmentation if TSO is flagged. */ > + size_t size = dp_packet_batch_size(batch); > + size_t k; > + curr_batch = batches; > + DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) { > + if (dp_packet_hwol_is_tso(packet)) { > + if (!dp_packet_gso(packet, &curr_batch)) { > + dp_packet_delete(packet); > + } > } else { > - dp_packet_delete(packet); > - COVERAGE_INC(netdev_send_prepare_drops); > - VLOG_WARN_RL(&rl, "%s: Packet dropped: %s", > - netdev_get_name(netdev), errormsg); > - free(errormsg); > + if (dp_packet_batch_is_full(curr_batch)) { > + curr_batch++; > + } > + > + dp_packet_batch_add(curr_batch, packet); > } > } > + > + for (curr_batch = batches; curr_batch <= last_batch; curr_batch++) { > + DP_PACKET_BATCH_FOR_EACH (i, packet, curr_batch) { > + dp_packet_ol_send_prepare(packet, netdev->ol_flags); > + } > + > + error = netdev->netdev_class->send(netdev, qid, curr_batch, > + concurrent_txq); > + if (!error) { > + COVERAGE_INC(netdev_sent); > + } else { > + retval = error; > + } > + } > + free(batches); > + return retval; > } > > /* Sends 'batch' on 'netdev'. Returns 0 if successful (for every packet), > @@ -877,11 +899,21 @@ int > netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, > bool concurrent_txq) > { > + const uint64_t netdev_flags = netdev->ol_flags; > + struct dp_packet *packet; > int error; > > - netdev_send_prepare_batch(netdev, batch); > - if (OVS_UNLIKELY(dp_packet_batch_is_empty(batch))) { > - return 0; > + if (userspace_tso_enabled() && > + !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + if (dp_packet_hwol_is_tso(packet)) { > + return netdev_send_tso(netdev, qid, batch, concurrent_txq); > + } > + } > + } > + > + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { > + dp_packet_ol_send_prepare(packet, netdev_flags); > } > > error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq); > diff --git a/lib/packets.c b/lib/packets.c > index 462b51f92..dab823ba2 100644 > --- a/lib/packets.c > +++ b/lib/packets.c > @@ -427,7 +427,7 @@ add_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse, > } > > if (!l3_encap) { > - struct mpls_hdr *header = dp_packet_push_uninit(packet, MPLS_HLEN); > + struct mpls_hdr *header = dp_packet_resize_l2(packet, MPLS_HLEN); > > put_16aligned_be32(&header->mpls_lse, lse); > packet->l2_5_ofs = 0; > @@ -513,7 +513,7 @@ push_nsh(struct dp_packet *packet, const struct nsh_hdr *nsh_hdr_src) > OVS_NOT_REACHED(); > } > > - nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length); > + nsh = (struct nsh_hdr *) dp_packet_resize_l2(packet, length); > memcpy(nsh, nsh_hdr_src, length); > nsh->next_proto = next_proto; > packet->packet_type = htonl(PT_NSH);
diff --git a/lib/automake.mk b/lib/automake.mk index e64ee76ce..49a92958d 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -118,6 +118,8 @@ lib_libopenvswitch_la_SOURCES = \ lib/dpctl.h \ lib/dp-packet.h \ lib/dp-packet.c \ + lib/dp-packet-gso.c \ + lib/dp-packet-gso.h \ lib/dpdk.h \ lib/dpif-netdev-extract-study.c \ lib/dpif-netdev-lookup.h \ diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c new file mode 100644 index 000000000..6a007fdad --- /dev/null +++ b/lib/dp-packet-gso.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2023 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> + +#include "coverage.h" +#include "dp-packet.h" +#include "dp-packet-gso.h" +#include "netdev-provider.h" +#include "openvswitch/vlog.h" + +VLOG_DEFINE_THIS_MODULE(dp_packet_gso); + +COVERAGE_DEFINE(soft_seg_good); +COVERAGE_DEFINE(soft_seg_drop); + +/* Retuns a new packet that is a segment of packet 'p'. + * + * The new packet is initialized with 'hdr_len' bytes from the + * start of packet 'p' and then appended with 'data_len' bytes + * from the 'data' buffer. + * + * Note: The packet headers are not updated. */ +static struct dp_packet * +dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len, + const char *data, size_t data_len) +{ + struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len, + dp_packet_headroom(p)); + + /* Append the original packet headers and then the payload. */ + dp_packet_put(seg, dp_packet_data(p), hdr_len); + dp_packet_put(seg, data, data_len); + + /* The new segment should have the same offsets. */ + seg->l2_5_ofs = p->l2_5_ofs; + seg->l3_ofs = p->l3_ofs; + seg->l4_ofs = p->l4_ofs; + + /* The protocol headers remain the same, so preserve hash and mark. */ + *dp_packet_rss_ptr(seg) = dp_packet_get_rss_hash(p); + *dp_packet_flow_mark_ptr(seg) = *dp_packet_flow_mark_ptr(p); + + /* The segment should inherit all the offloading flags from the + * original packet, except for the TCP segmentation, external + * buffer and indirect buffer flags. */ + *dp_packet_ol_flags_ptr(seg) = *dp_packet_ol_flags_ptr(p) + & DP_PACKET_OL_SUPPORTED_MASK; + + dp_packet_hwol_reset_tcp_seg(seg); + + return seg; +} + +/* Returns the calculated number of TCP segments in packet 'p'. */ +int +dp_packet_gso_nr_segs(struct dp_packet *p) +{ + uint16_t segsz = dp_packet_get_tso_segsz(p); + const char *data_tail; + const char *data_pos; + + data_pos = dp_packet_get_tcp_payload(p); + data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p); + + return DIV_ROUND_UP((data_tail - data_pos), segsz); + +} + +/* Perform software segmentation on packet 'p'. + * + * Returns all the segments added to the array of preallocated + * batches in 'batches' starting at batch position 'batch_pos'. */ +bool +dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + struct dp_packet_batch *curr_batch = *batches; + struct tcp_header *tcp_hdr; + struct ip_header *ip_hdr; + struct dp_packet *seg; + uint16_t tcp_offset; + uint16_t tso_segsz; + uint32_t tcp_seq; + uint16_t ip_id; + int hdr_len; + int seg_len; + + tso_segsz = dp_packet_get_tso_segsz(p); + if (!tso_segsz) { + VLOG_WARN_RL(&rl, "GSO packet with len %d with no segment size.", + dp_packet_size(p)); + COVERAGE_INC(soft_seg_drop); + return false; + } + + tcp_hdr = dp_packet_l4(p); + tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); + tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); + hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p)) + + tcp_offset * 4; + ip_id = 0; + if (dp_packet_hwol_is_ipv4(p)) { + ip_hdr = dp_packet_l3(p); + ip_id = ntohs(ip_hdr->ip_id); + } + + const char *data_tail = (char *) dp_packet_tail(p) + - dp_packet_l2_pad_size(p); + const char *data_pos = dp_packet_get_tcp_payload(p); + int n_segs = dp_packet_gso_nr_segs(p); + + for (int i = 0; i < n_segs; i++) { + seg_len = data_tail - data_pos; + if (seg_len > tso_segsz) { + seg_len = tso_segsz; + } + + seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len); + data_pos += seg_len; + + /* Update L3 header. */ + if (dp_packet_hwol_is_ipv4(seg)) { + ip_hdr = dp_packet_l3(seg); + ip_hdr->ip_tot_len = htons(sizeof *ip_hdr + + dp_packet_l4_size(seg)); + ip_hdr->ip_id = htons(ip_id); + ip_hdr->ip_csum = 0; + ip_id++; + } else { + struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); + + ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen + = htons(sizeof *ip_hdr + dp_packet_l4_size(seg)); + } + + /* Update L4 header. */ + tcp_hdr = dp_packet_l4(seg); + put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); + tcp_seq += seg_len; + if (OVS_LIKELY(i < (n_segs - 1))) { + /* Reset flags PUSH and FIN unless it is the last segment. */ + uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) + & ~(TCP_PSH | TCP_FIN); + tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); + } + + if (dp_packet_batch_is_full(curr_batch)) { + curr_batch++; + } + + dp_packet_batch_add(curr_batch, seg); + } + + *batches = curr_batch; + COVERAGE_INC(soft_seg_good); + return true; +} diff --git a/lib/dp-packet-gso.h b/lib/dp-packet-gso.h new file mode 100644 index 000000000..9c282fb86 --- /dev/null +++ b/lib/dp-packet-gso.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DP_PACKET_GSO_H +#define DP_PACKET_GSO_H 1 + +bool dp_packet_gso(struct dp_packet *, struct dp_packet_batch **); +int dp_packet_gso_nr_segs(struct dp_packet *); + +#endif /* dp-packet-gso.h */ diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 6a924f3ff..239d7a3d4 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -1131,6 +1131,13 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b) *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; } +/* Resets TCP Segmentation flag in packet 'p'. */ +static inline void +dp_packet_hwol_reset_tcp_seg(struct dp_packet *p) +{ + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_TCP_SEG; +} + /* Returns 'true' if the IP header has good integrity and the * checksum in it is complete. */ static inline bool diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1c100c48e..8eede8a88 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2288,6 +2288,7 @@ static bool netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) { struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf); + struct tcp_header *th; if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_TCP_SEG))) { @@ -2299,27 +2300,36 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf) mbuf->l4_len = 0; mbuf->outer_l2_len = 0; mbuf->outer_l3_len = 0; + th = dp_packet_l4(pkt); if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { - struct tcp_header *th = dp_packet_l4(pkt); - int hdr_len; - if (!th) { VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header" + " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); + return false; + } + } + + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) { + if (!th) { + VLOG_WARN_RL(&rl, "%s: TCP offloading without L4 header" " pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len); return false; } mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4; - mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; - hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len; - if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) { - VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. " - "hdr: %"PRIu32", gso: %"PRIu32", max len: %"PRIu32"", - dev->up.name, hdr_len, mbuf->tso_segsz, - dev->max_packet_len); - return false; + + if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) { + int hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; + if (OVS_UNLIKELY((hdr_len + + mbuf->tso_segsz) > dev->max_packet_len)) { + VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. hdr: %"PRIu32", " + "gso: %"PRIu32", max len: %"PRIu32"", + dev->up.name, hdr_len, mbuf->tso_segsz, + dev->max_packet_len); + return false; + } } if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) { @@ -2707,6 +2717,7 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) mbuf_dest->packet_type = pkt_orig->mbuf.packet_type; mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags & ~(RTE_MBUF_F_EXTERNAL | RTE_MBUF_F_INDIRECT)); + mbuf_dest->tso_segsz = pkt_orig->mbuf.tso_segsz; memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size, sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); @@ -2765,11 +2776,20 @@ netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch, struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); size_t cnt, pkt_cnt = dp_packet_batch_size(batch); + struct dp_packet *packet; + bool need_copy = false; memset(stats, 0, sizeof *stats); + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + if (packet->source != DPBUF_DPDK) { + need_copy = true; + break; + } + } + /* Copy dp-packets to mbufs. */ - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { + if (OVS_UNLIKELY(need_copy)) { cnt = dpdk_copy_batch_to_mbuf(netdev, batch); stats->tx_failure_drops += pkt_cnt - cnt; pkt_cnt = cnt; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 52ce4947d..7a92ee534 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -6849,55 +6849,6 @@ af_packet_sock(void) return sock; } -static int -netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto) -{ - struct eth_header *eth_hdr; - ovs_be16 eth_type; - int l2_len; - - eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN); - if (!eth_hdr) { - return -EINVAL; - } - - l2_len = ETH_HEADER_LEN; - eth_type = eth_hdr->eth_type; - if (eth_type_vlan(eth_type)) { - struct vlan_header *vlan = dp_packet_at(b, l2_len, VLAN_HEADER_LEN); - - if (!vlan) { - return -EINVAL; - } - - eth_type = vlan->vlan_next_type; - l2_len += VLAN_HEADER_LEN; - } - - if (eth_type == htons(ETH_TYPE_IP)) { - struct ip_header *ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN); - - if (!ip_hdr) { - return -EINVAL; - } - - *l4proto = ip_hdr->ip_proto; - dp_packet_hwol_set_tx_ipv4(b); - } else if (eth_type == htons(ETH_TYPE_IPV6)) { - struct ovs_16aligned_ip6_hdr *nh6; - - nh6 = dp_packet_at(b, l2_len, IPV6_HEADER_LEN); - if (!nh6) { - return -EINVAL; - } - - *l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt; - dp_packet_hwol_set_tx_ipv6(b); - } - - return 0; -} - /* Initializes packet 'b' with features enabled in the prepended * struct virtio_net_hdr. Returns 0 if successful, otherwise a * positive errno value. */ @@ -6915,15 +6866,6 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b) } if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { - uint16_t l4proto = 0; - - if (netdev_linux_parse_l2(b, &l4proto)) { - return EINVAL; - } - - if (l4proto == IPPROTO_UDP) { - dp_packet_hwol_set_csum_udp(b); - } /* The packet has offloaded checksum. However, there is no * additional information like the protocol used, so it would * require to parse the packet here. The checksum starting point diff --git a/lib/netdev.c b/lib/netdev.c index 8df7f8737..00c8004d9 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -35,6 +35,7 @@ #include "coverage.h" #include "dpif.h" #include "dp-packet.h" +#include "dp-packet-gso.h" #include "openvswitch/dynamic-string.h" #include "fatal-signal.h" #include "hash.h" @@ -56,6 +57,7 @@ #include "svec.h" #include "openvswitch/vlog.h" #include "flow.h" +#include "userspace-tso.h" #include "util.h" #ifdef __linux__ #include "tc.h" @@ -67,7 +69,6 @@ COVERAGE_DEFINE(netdev_received); COVERAGE_DEFINE(netdev_sent); COVERAGE_DEFINE(netdev_add_router); COVERAGE_DEFINE(netdev_get_stats); -COVERAGE_DEFINE(netdev_send_prepare_drops); COVERAGE_DEFINE(netdev_push_header_drops); struct netdev_saved_flags { @@ -792,60 +793,81 @@ netdev_get_pt_mode(const struct netdev *netdev) : NETDEV_PT_LEGACY_L2); } -/* Check if a 'packet' is compatible with 'netdev_flags'. - * If a packet is incompatible, return 'false' with the 'errormsg' - * pointing to a reason. */ -static bool -netdev_send_prepare_packet(const uint64_t netdev_flags, - struct dp_packet *packet, char **errormsg) -{ - if (dp_packet_hwol_is_tso(packet) - && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { - /* Fall back to GSO in software. */ - VLOG_ERR_BUF(errormsg, "No TSO support"); - return false; - } - - /* Packet with IP csum offloading enabled was received with verified csum. - * Leave the IP csum offloading enabled even with good checksum to the - * netdev to decide what would be the best to do. - * Provide a software fallback in case the device doesn't support IP csum - * offloading. Note: Encapsulated packet must have the inner IP header - * csum already calculated. - * Packet with L4 csum offloading enabled was received with verified csum. - * Leave the L4 csum offloading enabled even with good checksum for the - * netdev to decide what would be the best to do. - * Netdev that requires pseudo header csum needs to calculate that. - * Provide a software fallback in case the netdev doesn't support L4 csum - * offloading. Note: Encapsulated packet must have the inner L4 header - * csum already calculated. */ - dp_packet_ol_send_prepare(packet, netdev_flags); - - return true; -} - -/* Check if each packet in 'batch' is compatible with 'netdev' features, - * otherwise either fall back to software implementation or drop it. */ -static void -netdev_send_prepare_batch(const struct netdev *netdev, - struct dp_packet_batch *batch) +/* Attempts to segment GSO flagged packets and send them as multiple bundles. + * This function is only used if at least one packet in the current batch is + * flagged for TSO and the netdev does not support this. + * + * The return value is 0 if all batches sent successfully, and an error code + * from netdev_class->send() if at least one batch failed to send. */ +static int +netdev_send_tso(struct netdev *netdev, int qid, + struct dp_packet_batch *batch, bool concurrent_txq) { + struct dp_packet_batch *batches; struct dp_packet *packet; - size_t i, size = dp_packet_batch_size(batch); + int retval = 0; + int n_packets; + int n_batches; + int error; - DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { - char *errormsg = NULL; + /* Calculate the total number of packets in the batch after + * the segmentation. */ + n_packets = 0; + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + if (dp_packet_hwol_is_tso(packet)) { + n_packets += dp_packet_gso_nr_segs(packet); + } else { + n_packets++; + } + } - if (netdev_send_prepare_packet(netdev->ol_flags, packet, &errormsg)) { - dp_packet_batch_refill(batch, packet, i); + if (!n_packets) { + return 0; + } + + /* Allocate enough batches to store all the packets in order. */ + n_batches = DIV_ROUND_UP(n_packets, NETDEV_MAX_BURST); + batches = xmalloc(n_batches * sizeof *batches); + + struct dp_packet_batch *curr_batch = batches; + struct dp_packet_batch *last_batch = &batches[n_batches - 1]; + for (curr_batch = batches; curr_batch < last_batch; curr_batch++) { + dp_packet_batch_init(curr_batch); + } + + /* Do the packet segmentation if TSO is flagged. */ + size_t size = dp_packet_batch_size(batch); + size_t k; + curr_batch = batches; + DP_PACKET_BATCH_REFILL_FOR_EACH (k, size, packet, batch) { + if (dp_packet_hwol_is_tso(packet)) { + if (!dp_packet_gso(packet, &curr_batch)) { + dp_packet_delete(packet); + } } else { - dp_packet_delete(packet); - COVERAGE_INC(netdev_send_prepare_drops); - VLOG_WARN_RL(&rl, "%s: Packet dropped: %s", - netdev_get_name(netdev), errormsg); - free(errormsg); + if (dp_packet_batch_is_full(curr_batch)) { + curr_batch++; + } + + dp_packet_batch_add(curr_batch, packet); } } + + for (curr_batch = batches; curr_batch <= last_batch; curr_batch++) { + DP_PACKET_BATCH_FOR_EACH (i, packet, curr_batch) { + dp_packet_ol_send_prepare(packet, netdev->ol_flags); + } + + error = netdev->netdev_class->send(netdev, qid, curr_batch, + concurrent_txq); + if (!error) { + COVERAGE_INC(netdev_sent); + } else { + retval = error; + } + } + free(batches); + return retval; } /* Sends 'batch' on 'netdev'. Returns 0 if successful (for every packet), @@ -877,11 +899,21 @@ int netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, bool concurrent_txq) { + const uint64_t netdev_flags = netdev->ol_flags; + struct dp_packet *packet; int error; - netdev_send_prepare_batch(netdev, batch); - if (OVS_UNLIKELY(dp_packet_batch_is_empty(batch))) { - return 0; + if (userspace_tso_enabled() && + !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + if (dp_packet_hwol_is_tso(packet)) { + return netdev_send_tso(netdev, qid, batch, concurrent_txq); + } + } + } + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + dp_packet_ol_send_prepare(packet, netdev_flags); } error = netdev->netdev_class->send(netdev, qid, batch, concurrent_txq); diff --git a/lib/packets.c b/lib/packets.c index 462b51f92..dab823ba2 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -427,7 +427,7 @@ add_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse, } if (!l3_encap) { - struct mpls_hdr *header = dp_packet_push_uninit(packet, MPLS_HLEN); + struct mpls_hdr *header = dp_packet_resize_l2(packet, MPLS_HLEN); put_16aligned_be32(&header->mpls_lse, lse); packet->l2_5_ofs = 0; @@ -513,7 +513,7 @@ push_nsh(struct dp_packet *packet, const struct nsh_hdr *nsh_hdr_src) OVS_NOT_REACHED(); } - nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length); + nsh = (struct nsh_hdr *) dp_packet_resize_l2(packet, length); memcpy(nsh, nsh_hdr_src, length); nsh->next_proto = next_proto; packet->packet_type = htonl(PT_NSH);