Message ID | 1583795962-45820-1-git-send-email-u9012063@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,PATCHv3,1/2] userspace: Enable TSO support for non-DPDK. | expand |
Hi William, Any chance to get the function comments copied to the non-dpdk versions as well? I wonder if we could define DP_PACKET_OL.* defines as it is proposed in this patch if DPDK is not enabled, or define them as DPDK defines. For example: #ifdef DPDK DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG #else DP_PACKET_OL_TX_TCP_SEG = 0x40 #endif Then struct dp_packet #ifdef DPDK_NETDEV struct rte_mbuf mbuf; /* DPDK mbuf */ #define ol_flags mbuf.ol_flags #else void *base_; /* First byte of allocated space. */ uint16_t allocated_; /* Number of bytes allocated. */ uint16_t data_ofs; /* First byte actually in use. */ uint32_t size_; /* Number of bytes in use. */ uint32_t ol_flags; /* Offloading flags. */ Then we would have a single: static inline bool dp_packet_hwol_is_tso(const struct dp_packet *b) { return !!(b->ol_flags & PKT_TX_TCP_SEG); } I haven't tried, so don't know if I am missing something or if it will look ugly, but it would save us many function copies. fbl On Mon, Mar 09, 2020 at 04:19:21PM -0700, William Tu wrote: > This patch enables TSO support for non-DPDK use cases, and > also add check-system-tso testsuite. Before TSO, we have to > disable checksum offload, allowing the kernel to calculate the > TCP/UDP packet checsum. With TSO, we can skip the checksum > validation by enabling checksum offload, and with large packet > size, we see better performance. > > Consider container to container use cases: > iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1) > And I got around 6Gbps, similar to TSO with DPDK-enabled. > > Signed-off-by: William Tu <u9012063@gmail.com> > --- > v3: > - fix comments and some coding style > - add valgrind check > - travis: https://travis-ci.org/williamtu/ovs-travis/builds/660394007 > v2: > - add make check-system-tso test > - combine logging for dpdk and non-dpdk > - I'm surprised that most of the test cases passed. > This is due to few tests using tcp/udp, so it does not trigger > TSO. I saw only geneve/vxlan fails randomly, maybe we can > check it later. > --- > lib/dp-packet.h | 97 ++++++++++++++++++++++++++----------------- > lib/userspace-tso.c | 5 --- > tests/.gitignore | 3 ++ > tests/automake.mk | 21 ++++++++++ > tests/system-tso-macros.at | 31 ++++++++++++++ > tests/system-tso-testsuite.at | 26 ++++++++++++ > 6 files changed, 140 insertions(+), 43 deletions(-) > create mode 100644 tests/system-tso-macros.at > create mode 100644 tests/system-tso-testsuite.at > > diff --git a/lib/dp-packet.h b/lib/dp-packet.h > index 9f8991faad52..ece0dc5e54cd 100644 > --- a/lib/dp-packet.h > +++ b/lib/dp-packet.h > @@ -53,7 +53,27 @@ enum OVS_PACKED_ENUM dp_packet_source { > enum dp_packet_offload_mask { > DP_PACKET_OL_RSS_HASH_MASK = 0x1, /* Is the 'rss_hash' valid? */ > DP_PACKET_OL_FLOW_MARK_MASK = 0x2, /* Is the 'flow_mark' valid? */ > + DP_PACKET_OL_RX_L4_CKSUM_BAD = 0x4, /* Bad L4 checksum in the packet. */ > + DP_PACKET_OL_RX_IP_CKSUM_BAD = 0x8, /* Bad IP checksum in the packet. */ > + DP_PACKET_OL_RX_L4_CKSUM_GOOD = 0x10, /* Valid L4 checksum in the packet. > + */ > + DP_PACKET_OL_RX_IP_CKSUM_GOOD = 0x20, /* Valid IP checksum in the packet. > + */ > + DP_PACKET_OL_TX_TCP_SEG = 0x40, /* TCP Segmentation Offload. */ > + DP_PACKET_OL_TX_IPV4 = 0x80, /* Offloaded packet is IPv4. */ > + DP_PACKET_OL_TX_IPV6 = 0x100, /* Offloaded packet is IPv6. */ > + DP_PACKET_OL_TX_TCP_CKSUM = 0x200, /* Offload TCP checksum. */ > + DP_PACKET_OL_TX_UDP_CKSUM = 0x400, /* Offload UDP checksum. */ > + DP_PACKET_OL_TX_SCTP_CKSUM = 0x800, /* Offload SCTP checksum. */ > }; > + > +#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \ > + DP_PACKET_OL_TX_UDP_CKSUM | \ > + DP_PACKET_OL_TX_SCTP_CKSUM) > +#define DP_PACKET_OL_RX_IP_CKSUM_MASK (DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ > + DP_PACKET_OL_RX_IP_CKSUM_BAD) > +#define DP_PACKET_OL_RX_L4_CKSUM_MASK (DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ > + DP_PACKET_OL_RX_L4_CKSUM_BAD) > #else > /* DPDK mbuf ol_flags that are not really an offload flags. These are mostly > * related to mbuf memory layout and OVS should not touch/clear them. */ > @@ -739,82 +759,79 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) > b->allocated_ = s; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline bool > -dp_packet_hwol_is_tso(const struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_is_tso(const struct dp_packet *b) > { > - return false; > + return !!(b->ol_flags & DP_PACKET_OL_TX_TCP_SEG); > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline bool > -dp_packet_hwol_is_ipv4(const struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_is_ipv4(const struct dp_packet *b) > { > - return false; > + return !!(b->ol_flags & DP_PACKET_OL_TX_IPV4); > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline uint64_t > -dp_packet_hwol_l4_mask(const struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_l4_mask(const struct dp_packet *b) > { > - return 0; > + return b->ol_flags & DP_PACKET_OL_TX_L4_MASK; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline bool > -dp_packet_hwol_l4_is_tcp(const struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) > { > - return false; > + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == > + DP_PACKET_OL_TX_TCP_CKSUM; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline bool > -dp_packet_hwol_l4_is_udp(const struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_l4_is_udp(const struct dp_packet *b) > { > - return false; > + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == > + DP_PACKET_OL_TX_UDP_CKSUM; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline bool > -dp_packet_hwol_l4_is_sctp(const struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_l4_is_sctp(const struct dp_packet *b) > { > - return false; > + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == > + DP_PACKET_OL_TX_SCTP_CKSUM; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline void > -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) > { > + b->ol_flags |= DP_PACKET_OL_TX_IPV4; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline void > -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) > { > + b->ol_flags |= DP_PACKET_OL_TX_IPV6; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline void > -dp_packet_hwol_set_csum_tcp(struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_set_csum_tcp(struct dp_packet *b) > { > + b->ol_flags |= DP_PACKET_OL_TX_TCP_CKSUM; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline void > -dp_packet_hwol_set_csum_udp(struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_set_csum_udp(struct dp_packet *b) > { > + b->ol_flags |= DP_PACKET_OL_TX_UDP_CKSUM; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline void > -dp_packet_hwol_set_csum_sctp(struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_set_csum_sctp(struct dp_packet *b) > { > + b->ol_flags |= DP_PACKET_OL_TX_SCTP_CKSUM; > } > > -/* There are no implementation when not DPDK enabled datapath. */ > static inline void > -dp_packet_hwol_set_tcp_seg(struct dp_packet *b OVS_UNUSED) > +dp_packet_hwol_set_tcp_seg(struct dp_packet *b) > { > + b->ol_flags |= DP_PACKET_OL_TX_TCP_SEG; > } > > /* Returns the RSS hash of the packet 'p'. Note that the returned value is > @@ -845,27 +862,31 @@ dp_packet_reset_offload(struct dp_packet *p) > } > > static inline bool > -dp_packet_ip_checksum_valid(const struct dp_packet *p OVS_UNUSED) > +dp_packet_ip_checksum_valid(const struct dp_packet *p) > { > - return false; > + return (p->ol_flags & DP_PACKET_OL_RX_IP_CKSUM_MASK) == > + DP_PACKET_OL_RX_IP_CKSUM_GOOD; > } > > static inline bool > -dp_packet_ip_checksum_bad(const struct dp_packet *p OVS_UNUSED) > +dp_packet_ip_checksum_bad(const struct dp_packet *p) > { > - return false; > + return (p->ol_flags & DP_PACKET_OL_RX_IP_CKSUM_MASK) == > + DP_PACKET_OL_RX_IP_CKSUM_BAD; > } > > static inline bool > -dp_packet_l4_checksum_valid(const struct dp_packet *p OVS_UNUSED) > +dp_packet_l4_checksum_valid(const struct dp_packet *p) > { > - return false; > + return (p->ol_flags & DP_PACKET_OL_RX_L4_CKSUM_MASK) == > + DP_PACKET_OL_RX_L4_CKSUM_GOOD; > } > > static inline bool > -dp_packet_l4_checksum_bad(const struct dp_packet *p OVS_UNUSED) > +dp_packet_l4_checksum_bad(const struct dp_packet *p) > { > - return false; > + return (p->ol_flags & DP_PACKET_OL_RX_L4_CKSUM_MASK) == > + DP_PACKET_OL_RX_L4_CKSUM_BAD; > } > > static inline bool > diff --git a/lib/userspace-tso.c b/lib/userspace-tso.c > index 6a4a0149b7f5..f843c2a763ce 100644 > --- a/lib/userspace-tso.c > +++ b/lib/userspace-tso.c > @@ -34,13 +34,8 @@ userspace_tso_init(const struct smap *ovs_other_config) > static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; > > if (ovsthread_once_start(&once)) { > -#ifdef DPDK_NETDEV > VLOG_INFO("Userspace TCP Segmentation Offloading support enabled"); > userspace_tso = true; > -#else > - VLOG_WARN("Userspace TCP Segmentation Offloading can not be enabled" > - "since OVS is built without DPDK support."); > -#endif > ovsthread_once_done(&once); > } > } > diff --git a/tests/.gitignore b/tests/.gitignore > index 99fdf70d58cd..45b4f67b2a43 100644 > --- a/tests/.gitignore > +++ b/tests/.gitignore > @@ -24,6 +24,9 @@ > /system-userspace-testsuite > /system-userspace-testsuite.dir/ > /system-userspace-testsuite.log > +/system-tso-testsuite > +/system-tso-testsuite.dir/ > +/system-tso-testsuite.log > /system-offloads-testsuite > /system-offloads-testsuite.dir/ > /system-offloads-testsuite.log > diff --git a/tests/automake.mk b/tests/automake.mk > index 81eb2a9b8222..66859d5377c3 100644 > --- a/tests/automake.mk > +++ b/tests/automake.mk > @@ -4,6 +4,7 @@ EXTRA_DIST += \ > $(SYSTEM_TESTSUITE_AT) \ > $(SYSTEM_KMOD_TESTSUITE_AT) \ > $(SYSTEM_USERSPACE_TESTSUITE_AT) \ > + $(SYSTEM_TSO_TESTSUITE_AT) \ > $(SYSTEM_AFXDP_TESTSUITE_AT) \ > $(SYSTEM_OFFLOADS_TESTSUITE_AT) \ > $(SYSTEM_DPDK_TESTSUITE_AT) \ > @@ -11,6 +12,7 @@ EXTRA_DIST += \ > $(TESTSUITE) \ > $(SYSTEM_KMOD_TESTSUITE) \ > $(SYSTEM_USERSPACE_TESTSUITE) \ > + $(SYSTEM_TSO_TESTSUITE) \ > $(SYSTEM_AFXDP_TESTSUITE) \ > $(SYSTEM_OFFLOADS_TESTSUITE) \ > $(SYSTEM_DPDK_TESTSUITE) \ > @@ -154,6 +156,10 @@ SYSTEM_USERSPACE_TESTSUITE_AT = \ > tests/system-userspace-macros.at \ > tests/system-userspace-packet-type-aware.at > > +SYSTEM_TSO_TESTSUITE_AT = \ > + tests/system-tso-testsuite.at \ > + tests/system-tso-macros.at > + > SYSTEM_AFXDP_TESTSUITE_AT = \ > tests/system-userspace-macros.at \ > tests/system-afxdp-testsuite.at \ > @@ -183,6 +189,7 @@ TESTSUITE = $(srcdir)/tests/testsuite > TESTSUITE_PATCH = $(srcdir)/tests/testsuite.patch > SYSTEM_KMOD_TESTSUITE = $(srcdir)/tests/system-kmod-testsuite > SYSTEM_USERSPACE_TESTSUITE = $(srcdir)/tests/system-userspace-testsuite > +SYSTEM_TSO_TESTSUITE = $(srcdir)/tests/system-tso-testsuite > SYSTEM_AFXDP_TESTSUITE = $(srcdir)/tests/system-afxdp-testsuite > SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite > SYSTEM_DPDK_TESTSUITE = $(srcdir)/tests/system-dpdk-testsuite > @@ -296,6 +303,12 @@ check-offloads-valgrind: all $(valgrind_wrappers) $(check_DATA) > @echo '----------------------------------------------------------------------' > @echo 'Valgrind output can be found in tests/system-offloads-testsuite.dir/*/valgrind.*' > @echo '----------------------------------------------------------------------' > +check-tso-valgrind: all $(valgrind_wrappers) $(check_DATA) > + $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests VALGRIND='$(VALGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) -j1 > + @echo > + @echo '----------------------------------------------------------------------' > + @echo 'Valgrind output can be found in tests/system-tso-testsuite.dir/*/valgrind.*' > + @echo '----------------------------------------------------------------------' > check-helgrind: all $(valgrind_wrappers) $(check_DATA) > -$(SHELL) '$(TESTSUITE)' -C tests CHECK_VALGRIND=true VALGRIND='$(HELGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) > > @@ -326,6 +339,10 @@ check-system-userspace: all > set $(SHELL) '$(SYSTEM_USERSPACE_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ > "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) > > +check-system-tso: all > + set $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ > + "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) > + > check-afxdp: all > set $(SHELL) '$(SYSTEM_AFXDP_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)' $(TESTSUITEFLAGS) -j1; \ > "$$@" || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) > @@ -367,6 +384,10 @@ $(SYSTEM_USERSPACE_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_USERSP > $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at > $(AM_V_at)mv $@.tmp $@ > > +$(SYSTEM_TSO_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_TSO_TESTSUITE_AT) $(COMMON_MACROS_AT) > + $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at > + $(AM_V_at)mv $@.tmp $@ > + > $(SYSTEM_AFXDP_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_AFXDP_TESTSUITE_AT) $(COMMON_MACROS_AT) > $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at > $(AM_V_at)mv $@.tmp $@ > diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at > new file mode 100644 > index 000000000000..406334f3e081 > --- /dev/null > +++ b/tests/system-tso-macros.at > @@ -0,0 +1,31 @@ > +# _ADD_BR([name]) > +# > +# Expands into the proper ovs-vsctl commands to create a bridge with the > +# appropriate type and properties > +m4_define([_ADD_BR], [[add-br $1 -- set Bridge $1 datapath_type="netdev" protocols=OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13,OpenFlow14,OpenFlow15 fail-mode=secure ]]) > + > +# OVS_TRAFFIC_VSWITCHD_START([vsctl-args], [vsctl-output], [=override]) > +# > +# Creates a database and starts ovsdb-server, starts ovs-vswitchd > +# connected to that database, calls ovs-vsctl to create a bridge named > +# br0 with predictable settings, passing 'vsctl-args' as additional > +# commands to ovs-vsctl. If 'vsctl-args' causes ovs-vsctl to provide > +# output (e.g. because it includes "create" commands) then 'vsctl-output' > +# specifies the expected output after filtering through uuidfilt. > +m4_define([OVS_TRAFFIC_VSWITCHD_START], > + [ > + OVS_WAIT_WHILE([ip link show ovs-netdev]) > + _OVS_VSWITCHD_START([--disable-system]) > + dnl Add bridges, ports, etc. > + OVS_WAIT_WHILE([ip link show br0]) > + AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:userspace-tso-enable=true]) > + AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2]) > +]) > + > +# CONFIGURE_VETH_OFFLOADS([VETH]) > +# > +# Enable TCP segmentation offload and scatter-gather for veths. > +m4_define([CONFIGURE_VETH_OFFLOADS], > + [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] > + [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] > +) > diff --git a/tests/system-tso-testsuite.at b/tests/system-tso-testsuite.at > new file mode 100644 > index 000000000000..99d748006a86 > --- /dev/null > +++ b/tests/system-tso-testsuite.at > @@ -0,0 +1,26 @@ > +AT_INIT > + > +AT_COPYRIGHT([Copyright (c) 2020 VMware, Inc. > + > +Licensed under the Apache License, Version 2.0 (the "License"); > +you may not use this file except in compliance with the License. > +You may obtain a copy of the License at: > + > + http://www.apache.org/licenses/LICENSE-2.0 > + > +Unless required by applicable law or agreed to in writing, software > +distributed under the License is distributed on an "AS IS" BASIS, > +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > +See the License for the specific language governing permissions and > +limitations under the License.]) > + > +m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) > + > +m4_include([tests/ovs-macros.at]) > +m4_include([tests/ovsdb-macros.at]) > +m4_include([tests/ofproto-macros.at]) > +m4_include([tests/system-common-macros.at]) > +m4_include([tests/system-userspace-macros.at]) > +m4_include([tests/system-tso-macros.at]) > + > +m4_include([tests/system-traffic.at]) > -- > 2.7.4 >
On 3/10/20 10:01 PM, Flavio Leitner wrote: > > Hi William, > > Any chance to get the function comments copied to the non-dpdk > versions as well? > > I wonder if we could define DP_PACKET_OL.* defines as it is > proposed in this patch if DPDK is not enabled, or define them > as DPDK defines. For example: > > #ifdef DPDK > DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG > #else > DP_PACKET_OL_TX_TCP_SEG = 0x40 > #endif > > Then > struct dp_packet > #ifdef DPDK_NETDEV > struct rte_mbuf mbuf; /* DPDK mbuf */ > #define ol_flags mbuf.ol_flags > #else > void *base_; /* First byte of allocated space. */ > uint16_t allocated_; /* Number of bytes allocated. */ > uint16_t data_ofs; /* First byte actually in use. */ > uint32_t size_; /* Number of bytes in use. */ > uint32_t ol_flags; /* Offloading flags. */ > > > > Then we would have a single: > static inline bool > dp_packet_hwol_is_tso(const struct dp_packet *b) > { > return !!(b->ol_flags & PKT_TX_TCP_SEG); > } > > I haven't tried, so don't know if I am missing something or if it > will look ugly, but it would save us many function copies. Interesting. I generally like the idea except the ' #define ol_flags' part. We should define something that could not be used accidentally, e.g. #ifdef DPDK_NETDEV #define DP_PACKET_OL_FLAGS_FIELD mbuf.ol_flags #else #define DP_PACKET_OL_FLAGS_FIELD ol_flags #endif ... return !!(b->DP_PACKET_OL_FLAGS_FIELD & PKT_TX_TCP_SEG); Or, better write the inline access function that should look much more clean: static inline uint32_t * dp_packet_ol_flags_get(const struct dp_packet *b) { #ifdef DPDK_NETDEV return &b->mbuf.ol_flags; #else return &b->ol_flags; #endif } ... return !!(*dp_packet_ol_flags_get(b) & PKT_TX_TCP_SEG); ... *dp_packet_ol_flags_get(b) |= PKT_TX_SCTP_CKSUM; ... What do you think? Defines of flags might be shortened this way: #ifdef DPDK_NETDEV #define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = DPDK_DEF #else #define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = GENERIC_DEF #endif enum dp_packet_offload_mask { ... /* Valid IP checksum in the packet. */ DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_GOOD, 0x20), /* TCP Segmentation Offload. */ DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, PKT_TX_TCP_SEG, 0x40), ... }; Best regards, Ilya Maximets.
On Wed, Mar 11, 2020 at 11:47:22AM +0100, Ilya Maximets wrote: > On 3/10/20 10:01 PM, Flavio Leitner wrote: > > > > Hi William, > > > > Any chance to get the function comments copied to the non-dpdk > > versions as well? > > > > I wonder if we could define DP_PACKET_OL.* defines as it is > > proposed in this patch if DPDK is not enabled, or define them > > as DPDK defines. For example: > > > > #ifdef DPDK > > DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG > > #else > > DP_PACKET_OL_TX_TCP_SEG = 0x40 > > #endif > > > > Then > > struct dp_packet > > #ifdef DPDK_NETDEV > > struct rte_mbuf mbuf; /* DPDK mbuf */ > > #define ol_flags mbuf.ol_flags > > #else > > void *base_; /* First byte of allocated space. */ > > uint16_t allocated_; /* Number of bytes allocated. */ > > uint16_t data_ofs; /* First byte actually in use. */ > > uint32_t size_; /* Number of bytes in use. */ > > uint32_t ol_flags; /* Offloading flags. */ > > > > > > > > Then we would have a single: > > static inline bool > > dp_packet_hwol_is_tso(const struct dp_packet *b) > > { > > return !!(b->ol_flags & PKT_TX_TCP_SEG); > > } > > > > I haven't tried, so don't know if I am missing something or if it > > will look ugly, but it would save us many function copies. > > > Interesting. I generally like the idea except the ' #define ol_flags' > part. We should define something that could not be used accidentally, > e.g. > > #ifdef DPDK_NETDEV > #define DP_PACKET_OL_FLAGS_FIELD mbuf.ol_flags > #else > #define DP_PACKET_OL_FLAGS_FIELD ol_flags > #endif > ... > return !!(b->DP_PACKET_OL_FLAGS_FIELD & PKT_TX_TCP_SEG); > > > Or, better write the inline access function that should look much > more clean: > > static inline uint32_t * > dp_packet_ol_flags_get(const struct dp_packet *b) > { > #ifdef DPDK_NETDEV > return &b->mbuf.ol_flags; > #else > return &b->ol_flags; > #endif > } > ... > return !!(*dp_packet_ol_flags_get(b) & PKT_TX_TCP_SEG); > ... > *dp_packet_ol_flags_get(b) |= PKT_TX_SCTP_CKSUM; > ... > > > What do you think? Then we will have cases like this just to set another flag: dp_packet_ol_flags_set(b, dp_packet_ol_flags_get(b) | FLAG); I suspect that the define you proposed is better or the original one. > Defines of flags might be shortened this way: > > #ifdef DPDK_NETDEV > #define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = DPDK_DEF > #else > #define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = GENERIC_DEF > #endif > > enum dp_packet_offload_mask { > ... > /* Valid IP checksum in the packet. */ > DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_GOOD, 0x20), > /* TCP Segmentation Offload. */ > DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, PKT_TX_TCP_SEG, 0x40), > ... > }; Exactly, looks better that way. Thanks,
On 3/11/20 1:13 PM, Flavio Leitner wrote: > On Wed, Mar 11, 2020 at 11:47:22AM +0100, Ilya Maximets wrote: >> On 3/10/20 10:01 PM, Flavio Leitner wrote: >>> >>> Hi William, >>> >>> Any chance to get the function comments copied to the non-dpdk >>> versions as well? >>> >>> I wonder if we could define DP_PACKET_OL.* defines as it is >>> proposed in this patch if DPDK is not enabled, or define them >>> as DPDK defines. For example: >>> >>> #ifdef DPDK >>> DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG >>> #else >>> DP_PACKET_OL_TX_TCP_SEG = 0x40 >>> #endif >>> >>> Then >>> struct dp_packet >>> #ifdef DPDK_NETDEV >>> struct rte_mbuf mbuf; /* DPDK mbuf */ >>> #define ol_flags mbuf.ol_flags >>> #else >>> void *base_; /* First byte of allocated space. */ >>> uint16_t allocated_; /* Number of bytes allocated. */ >>> uint16_t data_ofs; /* First byte actually in use. */ >>> uint32_t size_; /* Number of bytes in use. */ >>> uint32_t ol_flags; /* Offloading flags. */ >>> >>> >>> >>> Then we would have a single: >>> static inline bool >>> dp_packet_hwol_is_tso(const struct dp_packet *b) >>> { >>> return !!(b->ol_flags & PKT_TX_TCP_SEG); >>> } >>> >>> I haven't tried, so don't know if I am missing something or if it >>> will look ugly, but it would save us many function copies. >> >> >> Interesting. I generally like the idea except the ' #define ol_flags' >> part. We should define something that could not be used accidentally, >> e.g. >> >> #ifdef DPDK_NETDEV >> #define DP_PACKET_OL_FLAGS_FIELD mbuf.ol_flags >> #else >> #define DP_PACKET_OL_FLAGS_FIELD ol_flags >> #endif >> ... >> return !!(b->DP_PACKET_OL_FLAGS_FIELD & PKT_TX_TCP_SEG); >> >> >> Or, better write the inline access function that should look much >> more clean: >> >> static inline uint32_t * >> dp_packet_ol_flags_get(const struct dp_packet *b) >> { >> #ifdef DPDK_NETDEV >> return &b->mbuf.ol_flags; >> #else >> return &b->ol_flags; >> #endif >> } >> ... >> return !!(*dp_packet_ol_flags_get(b) & PKT_TX_TCP_SEG); >> ... >> *dp_packet_ol_flags_get(b) |= PKT_TX_SCTP_CKSUM; >> ... >> >> >> What do you think? > > Then we will have cases like this just to set another flag: > dp_packet_ol_flags_set(b, dp_packet_ol_flags_get(b) | FLAG); > > I suspect that the define you proposed is better or the original > one. I didn't propose the _set() function. See example of flag setting few lines above. > > >> Defines of flags might be shortened this way: >> >> #ifdef DPDK_NETDEV >> #define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = DPDK_DEF >> #else >> #define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = GENERIC_DEF >> #endif >> >> enum dp_packet_offload_mask { >> ... >> /* Valid IP checksum in the packet. */ >> DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_GOOD, 0x20), >> /* TCP Segmentation Offload. */ >> DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, PKT_TX_TCP_SEG, 0x40), >> ... >> }; > > Exactly, looks better that way. > > Thanks, >
On Wed, Mar 11, 2020 at 01:30:32PM +0100, Ilya Maximets wrote: > On 3/11/20 1:13 PM, Flavio Leitner wrote: > > On Wed, Mar 11, 2020 at 11:47:22AM +0100, Ilya Maximets wrote: > >> On 3/10/20 10:01 PM, Flavio Leitner wrote: > >>> > >>> Hi William, > >>> > >>> Any chance to get the function comments copied to the non-dpdk > >>> versions as well? > >>> > >>> I wonder if we could define DP_PACKET_OL.* defines as it is > >>> proposed in this patch if DPDK is not enabled, or define them > >>> as DPDK defines. For example: > >>> > >>> #ifdef DPDK > >>> DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG > >>> #else > >>> DP_PACKET_OL_TX_TCP_SEG = 0x40 > >>> #endif > >>> > >>> Then > >>> struct dp_packet > >>> #ifdef DPDK_NETDEV > >>> struct rte_mbuf mbuf; /* DPDK mbuf */ > >>> #define ol_flags mbuf.ol_flags > >>> #else > >>> void *base_; /* First byte of allocated space. */ > >>> uint16_t allocated_; /* Number of bytes allocated. */ > >>> uint16_t data_ofs; /* First byte actually in use. */ > >>> uint32_t size_; /* Number of bytes in use. */ > >>> uint32_t ol_flags; /* Offloading flags. */ > >>> > >>> > >>> > >>> Then we would have a single: > >>> static inline bool > >>> dp_packet_hwol_is_tso(const struct dp_packet *b) > >>> { > >>> return !!(b->ol_flags & PKT_TX_TCP_SEG); > >>> } > >>> > >>> I haven't tried, so don't know if I am missing something or if it > >>> will look ugly, but it would save us many function copies. > >> > >> > >> Interesting. I generally like the idea except the ' #define ol_flags' > >> part. We should define something that could not be used accidentally, > >> e.g. > >> > >> #ifdef DPDK_NETDEV > >> #define DP_PACKET_OL_FLAGS_FIELD mbuf.ol_flags > >> #else > >> #define DP_PACKET_OL_FLAGS_FIELD ol_flags > >> #endif > >> ... > >> return !!(b->DP_PACKET_OL_FLAGS_FIELD & PKT_TX_TCP_SEG); > >> > >> > >> Or, better write the inline access function that should look much > >> more clean: > >> > >> static inline uint32_t * > >> dp_packet_ol_flags_get(const struct dp_packet *b) > >> { > >> #ifdef DPDK_NETDEV > >> return &b->mbuf.ol_flags; > >> #else > >> return &b->ol_flags; > >> #endif > >> } > >> ... > >> return !!(*dp_packet_ol_flags_get(b) & PKT_TX_TCP_SEG); > >> ... > >> *dp_packet_ol_flags_get(b) |= PKT_TX_SCTP_CKSUM; > >> ... > >> > >> > >> What do you think? > > > > Then we will have cases like this just to set another flag: > > dp_packet_ol_flags_set(b, dp_packet_ol_flags_get(b) | FLAG); > > > > I suspect that the define you proposed is better or the original > > one. > > I didn't propose the _set() function. See example of flag setting > few lines above. I meant that we would need a 'set' either by a function/define or other unusual way. IMO, '*dp_packet_ol_flags_get(b) |= ' isn't usual.
On 3/11/20 1:37 PM, Flavio Leitner wrote: > On Wed, Mar 11, 2020 at 01:30:32PM +0100, Ilya Maximets wrote: >> On 3/11/20 1:13 PM, Flavio Leitner wrote: >>> On Wed, Mar 11, 2020 at 11:47:22AM +0100, Ilya Maximets wrote: >>>> On 3/10/20 10:01 PM, Flavio Leitner wrote: >>>>> >>>>> Hi William, >>>>> >>>>> Any chance to get the function comments copied to the non-dpdk >>>>> versions as well? >>>>> >>>>> I wonder if we could define DP_PACKET_OL.* defines as it is >>>>> proposed in this patch if DPDK is not enabled, or define them >>>>> as DPDK defines. For example: >>>>> >>>>> #ifdef DPDK >>>>> DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG >>>>> #else >>>>> DP_PACKET_OL_TX_TCP_SEG = 0x40 >>>>> #endif >>>>> >>>>> Then >>>>> struct dp_packet >>>>> #ifdef DPDK_NETDEV >>>>> struct rte_mbuf mbuf; /* DPDK mbuf */ >>>>> #define ol_flags mbuf.ol_flags >>>>> #else >>>>> void *base_; /* First byte of allocated space. */ >>>>> uint16_t allocated_; /* Number of bytes allocated. */ >>>>> uint16_t data_ofs; /* First byte actually in use. */ >>>>> uint32_t size_; /* Number of bytes in use. */ >>>>> uint32_t ol_flags; /* Offloading flags. */ >>>>> >>>>> >>>>> >>>>> Then we would have a single: >>>>> static inline bool >>>>> dp_packet_hwol_is_tso(const struct dp_packet *b) >>>>> { >>>>> return !!(b->ol_flags & PKT_TX_TCP_SEG); >>>>> } >>>>> >>>>> I haven't tried, so don't know if I am missing something or if it >>>>> will look ugly, but it would save us many function copies. >>>> >>>> >>>> Interesting. I generally like the idea except the ' #define ol_flags' >>>> part. We should define something that could not be used accidentally, >>>> e.g. >>>> >>>> #ifdef DPDK_NETDEV >>>> #define DP_PACKET_OL_FLAGS_FIELD mbuf.ol_flags >>>> #else >>>> #define DP_PACKET_OL_FLAGS_FIELD ol_flags >>>> #endif >>>> ... >>>> return !!(b->DP_PACKET_OL_FLAGS_FIELD & PKT_TX_TCP_SEG); >>>> >>>> >>>> Or, better write the inline access function that should look much >>>> more clean: >>>> >>>> static inline uint32_t * >>>> dp_packet_ol_flags_get(const struct dp_packet *b) >>>> { >>>> #ifdef DPDK_NETDEV >>>> return &b->mbuf.ol_flags; >>>> #else >>>> return &b->ol_flags; >>>> #endif >>>> } >>>> ... >>>> return !!(*dp_packet_ol_flags_get(b) & PKT_TX_TCP_SEG); >>>> ... >>>> *dp_packet_ol_flags_get(b) |= PKT_TX_SCTP_CKSUM; >>>> ... >>>> >>>> >>>> What do you think? >>> >>> Then we will have cases like this just to set another flag: >>> dp_packet_ol_flags_set(b, dp_packet_ol_flags_get(b) | FLAG); >>> >>> I suspect that the define you proposed is better or the original >>> one. >> >> I didn't propose the _set() function. See example of flag setting >> few lines above. > > I meant that we would need a 'set' either by a function/define or > other unusual way. IMO, '*dp_packet_ol_flags_get(b) |= ' isn't usual. > I don't insist. Just a note that such constructions are not very unusual. In OVS coverage counters and per-thread static data implemented this way. There are a lot of examples in Linux kernel, e.g. per_cpu_ptr(). '_get()' might be replaced with '_ptr()' for readability, though.
On Wed, Mar 11, 2020 at 02:13:07PM +0100, Ilya Maximets wrote: > On 3/11/20 1:37 PM, Flavio Leitner wrote: > > On Wed, Mar 11, 2020 at 01:30:32PM +0100, Ilya Maximets wrote: > >> On 3/11/20 1:13 PM, Flavio Leitner wrote: > >>> On Wed, Mar 11, 2020 at 11:47:22AM +0100, Ilya Maximets wrote: > >>>> On 3/10/20 10:01 PM, Flavio Leitner wrote: > >>>>> > >>>>> Hi William, > >>>>> > >>>>> Any chance to get the function comments copied to the non-dpdk > >>>>> versions as well? > >>>>> > >>>>> I wonder if we could define DP_PACKET_OL.* defines as it is > >>>>> proposed in this patch if DPDK is not enabled, or define them > >>>>> as DPDK defines. For example: > >>>>> > >>>>> #ifdef DPDK > >>>>> DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG > >>>>> #else > >>>>> DP_PACKET_OL_TX_TCP_SEG = 0x40 > >>>>> #endif > >>>>> > >>>>> Then > >>>>> struct dp_packet > >>>>> #ifdef DPDK_NETDEV > >>>>> struct rte_mbuf mbuf; /* DPDK mbuf */ > >>>>> #define ol_flags mbuf.ol_flags > >>>>> #else > >>>>> void *base_; /* First byte of allocated space. */ > >>>>> uint16_t allocated_; /* Number of bytes allocated. */ > >>>>> uint16_t data_ofs; /* First byte actually in use. */ > >>>>> uint32_t size_; /* Number of bytes in use. */ > >>>>> uint32_t ol_flags; /* Offloading flags. */ > >>>>> > >>>>> > >>>>> > >>>>> Then we would have a single: > >>>>> static inline bool > >>>>> dp_packet_hwol_is_tso(const struct dp_packet *b) > >>>>> { > >>>>> return !!(b->ol_flags & PKT_TX_TCP_SEG); > >>>>> } > >>>>> > >>>>> I haven't tried, so don't know if I am missing something or if it > >>>>> will look ugly, but it would save us many function copies. > >>>> > >>>> > >>>> Interesting. I generally like the idea except the ' #define ol_flags' > >>>> part. We should define something that could not be used accidentally, > >>>> e.g. > >>>> > >>>> #ifdef DPDK_NETDEV > >>>> #define DP_PACKET_OL_FLAGS_FIELD mbuf.ol_flags > >>>> #else > >>>> #define DP_PACKET_OL_FLAGS_FIELD ol_flags > >>>> #endif > >>>> ... > >>>> return !!(b->DP_PACKET_OL_FLAGS_FIELD & PKT_TX_TCP_SEG); > >>>> > >>>> > >>>> Or, better write the inline access function that should look much > >>>> more clean: > >>>> > >>>> static inline uint32_t * > >>>> dp_packet_ol_flags_get(const struct dp_packet *b) > >>>> { > >>>> #ifdef DPDK_NETDEV > >>>> return &b->mbuf.ol_flags; > >>>> #else > >>>> return &b->ol_flags; > >>>> #endif > >>>> } > >>>> ... > >>>> return !!(*dp_packet_ol_flags_get(b) & PKT_TX_TCP_SEG); > >>>> ... > >>>> *dp_packet_ol_flags_get(b) |= PKT_TX_SCTP_CKSUM; > >>>> ... > >>>> > >>>> > >>>> What do you think? > >>> > >>> Then we will have cases like this just to set another flag: > >>> dp_packet_ol_flags_set(b, dp_packet_ol_flags_get(b) | FLAG); > >>> > >>> I suspect that the define you proposed is better or the original > >>> one. > >> > >> I didn't propose the _set() function. See example of flag setting > >> few lines above. > > > > I meant that we would need a 'set' either by a function/define or > > other unusual way. IMO, '*dp_packet_ol_flags_get(b) |= ' isn't usual. > > I don't insist. Just a note that such constructions are not very unusual. > In OVS coverage counters and per-thread static data implemented this way. > There are a lot of examples in Linux kernel, e.g. per_cpu_ptr(). > '_get()' might be replaced with '_ptr()' for readability, though. Definitely the _ptr() makes a difference. Another approach is using union: struct dp_packet { #ifdef DPDK struct mbuf mbuf; #else ... union { int ol_flags; } mbuf; ... #endif }; Then we always have: packet->mbuf.ol_flags = 0x10; I don't have a strong opinion. Thanks,
On Wed, Mar 11, 2020 at 1:22 PM Flavio Leitner <fbl@sysclose.org> wrote: > > On Wed, Mar 11, 2020 at 02:13:07PM +0100, Ilya Maximets wrote: > > On 3/11/20 1:37 PM, Flavio Leitner wrote: > > > On Wed, Mar 11, 2020 at 01:30:32PM +0100, Ilya Maximets wrote: > > >> On 3/11/20 1:13 PM, Flavio Leitner wrote: > > >>> On Wed, Mar 11, 2020 at 11:47:22AM +0100, Ilya Maximets wrote: > > >>>> On 3/10/20 10:01 PM, Flavio Leitner wrote: > > >>>>> > > >>>>> Hi William, > > >>>>> > > >>>>> Any chance to get the function comments copied to the non-dpdk > > >>>>> versions as well? > > >>>>> > > >>>>> I wonder if we could define DP_PACKET_OL.* defines as it is > > >>>>> proposed in this patch if DPDK is not enabled, or define them > > >>>>> as DPDK defines. For example: > > >>>>> > > >>>>> #ifdef DPDK > > >>>>> DP_PACKET_OL_TX_TCP_SEG = PKT_TX_TCP_SEG > > >>>>> #else > > >>>>> DP_PACKET_OL_TX_TCP_SEG = 0x40 > > >>>>> #endif > > >>>>> > > >>>>> Then > > >>>>> struct dp_packet > > >>>>> #ifdef DPDK_NETDEV > > >>>>> struct rte_mbuf mbuf; /* DPDK mbuf */ > > >>>>> #define ol_flags mbuf.ol_flags > > >>>>> #else > > >>>>> void *base_; /* First byte of allocated space. */ > > >>>>> uint16_t allocated_; /* Number of bytes allocated. */ > > >>>>> uint16_t data_ofs; /* First byte actually in use. */ > > >>>>> uint32_t size_; /* Number of bytes in use. */ > > >>>>> uint32_t ol_flags; /* Offloading flags. */ > > >>>>> > > >>>>> > > >>>>> > > >>>>> Then we would have a single: > > >>>>> static inline bool > > >>>>> dp_packet_hwol_is_tso(const struct dp_packet *b) > > >>>>> { > > >>>>> return !!(b->ol_flags & PKT_TX_TCP_SEG); > > >>>>> } > > >>>>> > > >>>>> I haven't tried, so don't know if I am missing something or if it > > >>>>> will look ugly, but it would save us many function copies. > > >>>> > > >>>> > > >>>> Interesting. I generally like the idea except the ' #define ol_flags' > > >>>> part. We should define something that could not be used accidentally, > > >>>> e.g. > > >>>> > > >>>> #ifdef DPDK_NETDEV > > >>>> #define DP_PACKET_OL_FLAGS_FIELD mbuf.ol_flags > > >>>> #else > > >>>> #define DP_PACKET_OL_FLAGS_FIELD ol_flags > > >>>> #endif > > >>>> ... > > >>>> return !!(b->DP_PACKET_OL_FLAGS_FIELD & PKT_TX_TCP_SEG); > > >>>> > > >>>> > > >>>> Or, better write the inline access function that should look much > > >>>> more clean: > > >>>> > > >>>> static inline uint32_t * > > >>>> dp_packet_ol_flags_get(const struct dp_packet *b) > > >>>> { > > >>>> #ifdef DPDK_NETDEV > > >>>> return &b->mbuf.ol_flags; > > >>>> #else > > >>>> return &b->ol_flags; > > >>>> #endif > > >>>> } > > >>>> ... > > >>>> return !!(*dp_packet_ol_flags_get(b) & PKT_TX_TCP_SEG); > > >>>> ... > > >>>> *dp_packet_ol_flags_get(b) |= PKT_TX_SCTP_CKSUM; > > >>>> ... > > >>>> > > >>>> > > >>>> What do you think? > > >>> > > >>> Then we will have cases like this just to set another flag: > > >>> dp_packet_ol_flags_set(b, dp_packet_ol_flags_get(b) | FLAG); > > >>> > > >>> I suspect that the define you proposed is better or the original > > >>> one. > > >> > > >> I didn't propose the _set() function. See example of flag setting > > >> few lines above. > > > > > > I meant that we would need a 'set' either by a function/define or > > > other unusual way. IMO, '*dp_packet_ol_flags_get(b) |= ' isn't usual. > > > > I don't insist. Just a note that such constructions are not very unusual. > > In OVS coverage counters and per-thread static data implemented this way. > > There are a lot of examples in Linux kernel, e.g. per_cpu_ptr(). > > '_get()' might be replaced with '_ptr()' for readability, though. > > Definitely the _ptr() makes a difference. > > Another approach is using union: > struct dp_packet { > #ifdef DPDK > struct mbuf mbuf; > #else > ... > union { > int ol_flags; > } mbuf; > ... > #endif > }; > > Then we always have: > packet->mbuf.ol_flags = 0x10; This might mislead people that DPDK mbuf is being used. > > I don't have a strong opinion. > Thanks, > -- > fbl Hi Flavio and Ilya, Thanks a lot for these valuable feedbacks. I will work on the next version. William
diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 9f8991faad52..ece0dc5e54cd 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -53,7 +53,27 @@ enum OVS_PACKED_ENUM dp_packet_source { enum dp_packet_offload_mask { DP_PACKET_OL_RSS_HASH_MASK = 0x1, /* Is the 'rss_hash' valid? */ DP_PACKET_OL_FLOW_MARK_MASK = 0x2, /* Is the 'flow_mark' valid? */ + DP_PACKET_OL_RX_L4_CKSUM_BAD = 0x4, /* Bad L4 checksum in the packet. */ + DP_PACKET_OL_RX_IP_CKSUM_BAD = 0x8, /* Bad IP checksum in the packet. */ + DP_PACKET_OL_RX_L4_CKSUM_GOOD = 0x10, /* Valid L4 checksum in the packet. + */ + DP_PACKET_OL_RX_IP_CKSUM_GOOD = 0x20, /* Valid IP checksum in the packet. + */ + DP_PACKET_OL_TX_TCP_SEG = 0x40, /* TCP Segmentation Offload. */ + DP_PACKET_OL_TX_IPV4 = 0x80, /* Offloaded packet is IPv4. */ + DP_PACKET_OL_TX_IPV6 = 0x100, /* Offloaded packet is IPv6. */ + DP_PACKET_OL_TX_TCP_CKSUM = 0x200, /* Offload TCP checksum. */ + DP_PACKET_OL_TX_UDP_CKSUM = 0x400, /* Offload UDP checksum. */ + DP_PACKET_OL_TX_SCTP_CKSUM = 0x800, /* Offload SCTP checksum. */ }; + +#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \ + DP_PACKET_OL_TX_UDP_CKSUM | \ + DP_PACKET_OL_TX_SCTP_CKSUM) +#define DP_PACKET_OL_RX_IP_CKSUM_MASK (DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ + DP_PACKET_OL_RX_IP_CKSUM_BAD) +#define DP_PACKET_OL_RX_L4_CKSUM_MASK (DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ + DP_PACKET_OL_RX_L4_CKSUM_BAD) #else /* DPDK mbuf ol_flags that are not really an offload flags. These are mostly * related to mbuf memory layout and OVS should not touch/clear them. */ @@ -739,82 +759,79 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) b->allocated_ = s; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_is_tso(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_is_tso(const struct dp_packet *b) { - return false; + return !!(b->ol_flags & DP_PACKET_OL_TX_TCP_SEG); } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_is_ipv4(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_is_ipv4(const struct dp_packet *b) { - return false; + return !!(b->ol_flags & DP_PACKET_OL_TX_IPV4); } -/* There are no implementation when not DPDK enabled datapath. */ static inline uint64_t -dp_packet_hwol_l4_mask(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_mask(const struct dp_packet *b) { - return 0; + return b->ol_flags & DP_PACKET_OL_TX_L4_MASK; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_l4_is_tcp(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) { - return false; + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_TCP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_l4_is_udp(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_is_udp(const struct dp_packet *b) { - return false; + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_UDP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline bool -dp_packet_hwol_l4_is_sctp(const struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_l4_is_sctp(const struct dp_packet *b) { - return false; + return (b->ol_flags & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_SCTP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_IPV4; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_IPV6; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_csum_tcp(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_csum_tcp(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_TCP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_csum_udp(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_csum_udp(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_UDP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_csum_sctp(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_csum_sctp(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_SCTP_CKSUM; } -/* There are no implementation when not DPDK enabled datapath. */ static inline void -dp_packet_hwol_set_tcp_seg(struct dp_packet *b OVS_UNUSED) +dp_packet_hwol_set_tcp_seg(struct dp_packet *b) { + b->ol_flags |= DP_PACKET_OL_TX_TCP_SEG; } /* Returns the RSS hash of the packet 'p'. Note that the returned value is @@ -845,27 +862,31 @@ dp_packet_reset_offload(struct dp_packet *p) } static inline bool -dp_packet_ip_checksum_valid(const struct dp_packet *p OVS_UNUSED) +dp_packet_ip_checksum_valid(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_IP_CKSUM_MASK) == + DP_PACKET_OL_RX_IP_CKSUM_GOOD; } static inline bool -dp_packet_ip_checksum_bad(const struct dp_packet *p OVS_UNUSED) +dp_packet_ip_checksum_bad(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_IP_CKSUM_MASK) == + DP_PACKET_OL_RX_IP_CKSUM_BAD; } static inline bool -dp_packet_l4_checksum_valid(const struct dp_packet *p OVS_UNUSED) +dp_packet_l4_checksum_valid(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_L4_CKSUM_MASK) == + DP_PACKET_OL_RX_L4_CKSUM_GOOD; } static inline bool -dp_packet_l4_checksum_bad(const struct dp_packet *p OVS_UNUSED) +dp_packet_l4_checksum_bad(const struct dp_packet *p) { - return false; + return (p->ol_flags & DP_PACKET_OL_RX_L4_CKSUM_MASK) == + DP_PACKET_OL_RX_L4_CKSUM_BAD; } static inline bool diff --git a/lib/userspace-tso.c b/lib/userspace-tso.c index 6a4a0149b7f5..f843c2a763ce 100644 --- a/lib/userspace-tso.c +++ b/lib/userspace-tso.c @@ -34,13 +34,8 @@ userspace_tso_init(const struct smap *ovs_other_config) static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { -#ifdef DPDK_NETDEV VLOG_INFO("Userspace TCP Segmentation Offloading support enabled"); userspace_tso = true; -#else - VLOG_WARN("Userspace TCP Segmentation Offloading can not be enabled" - "since OVS is built without DPDK support."); -#endif ovsthread_once_done(&once); } } diff --git a/tests/.gitignore b/tests/.gitignore index 99fdf70d58cd..45b4f67b2a43 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -24,6 +24,9 @@ /system-userspace-testsuite /system-userspace-testsuite.dir/ /system-userspace-testsuite.log +/system-tso-testsuite +/system-tso-testsuite.dir/ +/system-tso-testsuite.log /system-offloads-testsuite /system-offloads-testsuite.dir/ /system-offloads-testsuite.log diff --git a/tests/automake.mk b/tests/automake.mk index 81eb2a9b8222..66859d5377c3 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -4,6 +4,7 @@ EXTRA_DIST += \ $(SYSTEM_TESTSUITE_AT) \ $(SYSTEM_KMOD_TESTSUITE_AT) \ $(SYSTEM_USERSPACE_TESTSUITE_AT) \ + $(SYSTEM_TSO_TESTSUITE_AT) \ $(SYSTEM_AFXDP_TESTSUITE_AT) \ $(SYSTEM_OFFLOADS_TESTSUITE_AT) \ $(SYSTEM_DPDK_TESTSUITE_AT) \ @@ -11,6 +12,7 @@ EXTRA_DIST += \ $(TESTSUITE) \ $(SYSTEM_KMOD_TESTSUITE) \ $(SYSTEM_USERSPACE_TESTSUITE) \ + $(SYSTEM_TSO_TESTSUITE) \ $(SYSTEM_AFXDP_TESTSUITE) \ $(SYSTEM_OFFLOADS_TESTSUITE) \ $(SYSTEM_DPDK_TESTSUITE) \ @@ -154,6 +156,10 @@ SYSTEM_USERSPACE_TESTSUITE_AT = \ tests/system-userspace-macros.at \ tests/system-userspace-packet-type-aware.at +SYSTEM_TSO_TESTSUITE_AT = \ + tests/system-tso-testsuite.at \ + tests/system-tso-macros.at + SYSTEM_AFXDP_TESTSUITE_AT = \ tests/system-userspace-macros.at \ tests/system-afxdp-testsuite.at \ @@ -183,6 +189,7 @@ TESTSUITE = $(srcdir)/tests/testsuite TESTSUITE_PATCH = $(srcdir)/tests/testsuite.patch SYSTEM_KMOD_TESTSUITE = $(srcdir)/tests/system-kmod-testsuite SYSTEM_USERSPACE_TESTSUITE = $(srcdir)/tests/system-userspace-testsuite +SYSTEM_TSO_TESTSUITE = $(srcdir)/tests/system-tso-testsuite SYSTEM_AFXDP_TESTSUITE = $(srcdir)/tests/system-afxdp-testsuite SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite SYSTEM_DPDK_TESTSUITE = $(srcdir)/tests/system-dpdk-testsuite @@ -296,6 +303,12 @@ check-offloads-valgrind: all $(valgrind_wrappers) $(check_DATA) @echo '----------------------------------------------------------------------' @echo 'Valgrind output can be found in tests/system-offloads-testsuite.dir/*/valgrind.*' @echo '----------------------------------------------------------------------' +check-tso-valgrind: all $(valgrind_wrappers) $(check_DATA) + $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests VALGRIND='$(VALGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) -j1 + @echo + @echo '----------------------------------------------------------------------' + @echo 'Valgrind output can be found in tests/system-tso-testsuite.dir/*/valgrind.*' + @echo '----------------------------------------------------------------------' check-helgrind: all $(valgrind_wrappers) $(check_DATA) -$(SHELL) '$(TESTSUITE)' -C tests CHECK_VALGRIND=true VALGRIND='$(HELGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) @@ -326,6 +339,10 @@ check-system-userspace: all set $(SHELL) '$(SYSTEM_USERSPACE_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) +check-system-tso: all + set $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ + "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) + check-afxdp: all set $(SHELL) '$(SYSTEM_AFXDP_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)' $(TESTSUITEFLAGS) -j1; \ "$$@" || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) @@ -367,6 +384,10 @@ $(SYSTEM_USERSPACE_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_USERSP $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at $(AM_V_at)mv $@.tmp $@ +$(SYSTEM_TSO_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_TSO_TESTSUITE_AT) $(COMMON_MACROS_AT) + $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at + $(AM_V_at)mv $@.tmp $@ + $(SYSTEM_AFXDP_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_AFXDP_TESTSUITE_AT) $(COMMON_MACROS_AT) $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at $(AM_V_at)mv $@.tmp $@ diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at new file mode 100644 index 000000000000..406334f3e081 --- /dev/null +++ b/tests/system-tso-macros.at @@ -0,0 +1,31 @@ +# _ADD_BR([name]) +# +# Expands into the proper ovs-vsctl commands to create a bridge with the +# appropriate type and properties +m4_define([_ADD_BR], [[add-br $1 -- set Bridge $1 datapath_type="netdev" protocols=OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13,OpenFlow14,OpenFlow15 fail-mode=secure ]]) + +# OVS_TRAFFIC_VSWITCHD_START([vsctl-args], [vsctl-output], [=override]) +# +# Creates a database and starts ovsdb-server, starts ovs-vswitchd +# connected to that database, calls ovs-vsctl to create a bridge named +# br0 with predictable settings, passing 'vsctl-args' as additional +# commands to ovs-vsctl. If 'vsctl-args' causes ovs-vsctl to provide +# output (e.g. because it includes "create" commands) then 'vsctl-output' +# specifies the expected output after filtering through uuidfilt. +m4_define([OVS_TRAFFIC_VSWITCHD_START], + [ + OVS_WAIT_WHILE([ip link show ovs-netdev]) + _OVS_VSWITCHD_START([--disable-system]) + dnl Add bridges, ports, etc. + OVS_WAIT_WHILE([ip link show br0]) + AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:userspace-tso-enable=true]) + AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2]) +]) + +# CONFIGURE_VETH_OFFLOADS([VETH]) +# +# Enable TCP segmentation offload and scatter-gather for veths. +m4_define([CONFIGURE_VETH_OFFLOADS], + [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] + [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] +) diff --git a/tests/system-tso-testsuite.at b/tests/system-tso-testsuite.at new file mode 100644 index 000000000000..99d748006a86 --- /dev/null +++ b/tests/system-tso-testsuite.at @@ -0,0 +1,26 @@ +AT_INIT + +AT_COPYRIGHT([Copyright (c) 2020 VMware, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.]) + +m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) + +m4_include([tests/ovs-macros.at]) +m4_include([tests/ovsdb-macros.at]) +m4_include([tests/ofproto-macros.at]) +m4_include([tests/system-common-macros.at]) +m4_include([tests/system-userspace-macros.at]) +m4_include([tests/system-tso-macros.at]) + +m4_include([tests/system-traffic.at])
This patch enables TSO support for non-DPDK use cases, and also add check-system-tso testsuite. Before TSO, we have to disable checksum offload, allowing the kernel to calculate the TCP/UDP packet checsum. With TSO, we can skip the checksum validation by enabling checksum offload, and with large packet size, we see better performance. Consider container to container use cases: iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1) And I got around 6Gbps, similar to TSO with DPDK-enabled. Signed-off-by: William Tu <u9012063@gmail.com> --- v3: - fix comments and some coding style - add valgrind check - travis: https://travis-ci.org/williamtu/ovs-travis/builds/660394007 v2: - add make check-system-tso test - combine logging for dpdk and non-dpdk - I'm surprised that most of the test cases passed. This is due to few tests using tcp/udp, so it does not trigger TSO. I saw only geneve/vxlan fails randomly, maybe we can check it later. --- lib/dp-packet.h | 97 ++++++++++++++++++++++++++----------------- lib/userspace-tso.c | 5 --- tests/.gitignore | 3 ++ tests/automake.mk | 21 ++++++++++ tests/system-tso-macros.at | 31 ++++++++++++++ tests/system-tso-testsuite.at | 26 ++++++++++++ 6 files changed, 140 insertions(+), 43 deletions(-) create mode 100644 tests/system-tso-macros.at create mode 100644 tests/system-tso-testsuite.at