Message ID | 1582824604-113843-1-git-send-email-u9012063@gmail.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev,PATCHv2] netdev-afxdp: Add interrupt mode netdev class. | expand |
On 2/27/20 6:30 PM, William Tu wrote: > The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp > interrupt mode. This is similar to 'type=afxdp', except that the > is_pmd field is set to false. As a result, the packet processing > is handled by main thread, not pmd thread. This avoids burning > the CPU to always 100% when there is no traffic. > > Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 > Signed-off-by: William Tu <u9012063@gmail.com> > --- > NEWS | 3 +++ > lib/netdev-afxdp.c | 20 +++++++++++++++++++- > lib/netdev-linux.c | 20 ++++++++++++++++++++ > lib/netdev-provider.h | 1 + > lib/netdev.c | 1 + > tests/system-afxdp.at | 23 +++++++++++++++++++++++ > 6 files changed, 67 insertions(+), 1 deletion(-) > > diff --git a/NEWS b/NEWS > index f62ef1f47ea8..594c55dc11d6 100644 > --- a/NEWS > +++ b/NEWS > @@ -4,6 +4,9 @@ Post-v2.13.0 > - OpenFlow: > * The OpenFlow ofp_desc/serial_num may now be configured by setting the > value of other-config:dp-sn in the Bridge table. > + - AF_XDP: > + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles > + by enabling interrupt mode. > > > v2.13.0 - 14 Feb 2020 > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > index 482400d8d135..cd2c7c381139 100644 > --- a/lib/netdev-afxdp.c > +++ b/lib/netdev-afxdp.c > @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { > ); > }; > > +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ > +static bool > +netdev_is_afxdp_nonpmd(struct netdev *netdev) { > + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; > +} > + > #ifdef HAVE_XDP_NEED_WAKEUP > static inline void > xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, > @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, > struct netdev_linux *dev; > int ret; > > - if (concurrent_txq) { > + /* Lock is required when mixing afxdp pmd and nonpmd mode. > + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. > + */ > + if (concurrent_txq || (nonpmd_cnt != 0)) { I'm confused about this change. Are you expecting same device being opened twice with different netdev type? Database should not allow this. > dev = netdev_linux_cast(netdev); > qid = qid % netdev_n_txq(netdev); > > @@ -1159,6 +1168,7 @@ libbpf_print(enum libbpf_print_level level, > int netdev_afxdp_init(void) > { > libbpf_set_print(libbpf_print); > + nonpmd_cnt = 0; > return 0; > } > > @@ -1188,6 +1198,10 @@ netdev_afxdp_construct(struct netdev *netdev) > dev->tx_locks = NULL; > > netdev_request_reconfigure(netdev); > + > + if (netdev_is_afxdp_nonpmd(netdev)) { > + nonpmd_cnt++; > + } > return 0; > } > > @@ -1208,6 +1222,10 @@ netdev_afxdp_destruct(struct netdev *netdev) > > xsk_destroy_all(netdev); > ovs_mutex_destroy(&dev->mutex); > + > + if (netdev_is_afxdp_nonpmd(netdev)) { > + nonpmd_cnt--; > + } > } > > int > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index c6f3d27409b6..75b3b55d3cad 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -3605,6 +3605,26 @@ const struct netdev_class netdev_afxdp_class = { > .rxq_destruct = netdev_afxdp_rxq_destruct, > .rxq_recv = netdev_afxdp_rxq_recv, > }; > + > +const struct netdev_class netdev_afxdp_nonpmd_class = { > + NETDEV_LINUX_CLASS_COMMON, > + .type = "afxdp-nonpmd", > + .is_pmd = false, > + .init = netdev_afxdp_init, > + .construct = netdev_afxdp_construct, > + .destruct = netdev_afxdp_destruct, > + .get_stats = netdev_afxdp_get_stats, > + .get_custom_stats = netdev_afxdp_get_custom_stats, > + .get_status = netdev_linux_get_status, > + .set_config = netdev_afxdp_set_config, > + .get_config = netdev_afxdp_get_config, > + .reconfigure = netdev_afxdp_reconfigure, > + .get_numa_id = netdev_linux_get_numa_id, > + .send = netdev_afxdp_batch_send, > + .rxq_construct = netdev_afxdp_rxq_construct, > + .rxq_destruct = netdev_afxdp_rxq_destruct, > + .rxq_recv = netdev_afxdp_rxq_recv, > +}; > #endif > > > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h > index 22f4cde3337a..06c1d98a0b07 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -848,6 +848,7 @@ extern const struct netdev_class netdev_tap_class; > > #ifdef HAVE_AF_XDP > extern const struct netdev_class netdev_afxdp_class; > +extern const struct netdev_class netdev_afxdp_nonpmd_class; > #endif > #ifdef __cplusplus > } > diff --git a/lib/netdev.c b/lib/netdev.c > index f95b19af4da0..6d9723ebc3c1 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -154,6 +154,7 @@ netdev_initialize(void) > netdev_register_flow_api_provider(&netdev_offload_tc); > #ifdef HAVE_AF_XDP > netdev_register_provider(&netdev_afxdp_class); > + netdev_register_provider(&netdev_afxdp_nonpmd_class); > #endif > #endif > #if defined(__FreeBSD__) || defined(__NetBSD__) > diff --git a/tests/system-afxdp.at b/tests/system-afxdp.at > index e4451624f882..0d09906fb6c8 100644 > --- a/tests/system-afxdp.at > +++ b/tests/system-afxdp.at > @@ -22,3 +22,26 @@ AT_CHECK([grep "ovs-p0: could not set configuration" ovs-vswitchd.log | wc -l], > OVS_TRAFFIC_VSWITCHD_STOP(["/ovs-p0: Too big 'n_rxq'/d > /ovs-p0: could not set configuration/d"]) > AT_CLEANUP > + > + > +AT_SETUP([AF_XDP - ping between pmd and non-pmd ports]) > +AT_KEYWORDS([afxdp nonpmd]) > +OVS_TRAFFIC_VSWITCHD_START() > + > +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) > + > +ADD_NAMESPACES(at_ns0, at_ns1) > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") > + > +AT_CHECK([ovs-vsctl del-port ovs-p0]) > +AT_CHECK([ovs-vsctl add-port br0 ovs-p0 -- \ > + set interface ovs-p0 type=afxdp-nonpmd options:n_rxq=1], > + [0], [], [stderr]) > + > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP >
On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 2/27/20 6:30 PM, William Tu wrote: > > The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp > > interrupt mode. This is similar to 'type=afxdp', except that the > > is_pmd field is set to false. As a result, the packet processing > > is handled by main thread, not pmd thread. This avoids burning > > the CPU to always 100% when there is no traffic. > > > > Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 > > Signed-off-by: William Tu <u9012063@gmail.com> > > --- > > NEWS | 3 +++ > > lib/netdev-afxdp.c | 20 +++++++++++++++++++- > > lib/netdev-linux.c | 20 ++++++++++++++++++++ > > lib/netdev-provider.h | 1 + > > lib/netdev.c | 1 + > > tests/system-afxdp.at | 23 +++++++++++++++++++++++ > > 6 files changed, 67 insertions(+), 1 deletion(-) > > > > diff --git a/NEWS b/NEWS > > index f62ef1f47ea8..594c55dc11d6 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -4,6 +4,9 @@ Post-v2.13.0 > > - OpenFlow: > > * The OpenFlow ofp_desc/serial_num may now be configured by setting the > > value of other-config:dp-sn in the Bridge table. > > + - AF_XDP: > > + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles > > + by enabling interrupt mode. > > > > > > v2.13.0 - 14 Feb 2020 > > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > > index 482400d8d135..cd2c7c381139 100644 > > --- a/lib/netdev-afxdp.c > > +++ b/lib/netdev-afxdp.c > > @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { > > ); > > }; > > > > +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ > > +static bool > > +netdev_is_afxdp_nonpmd(struct netdev *netdev) { > > + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; > > +} > > + > > #ifdef HAVE_XDP_NEED_WAKEUP > > static inline void > > xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, > > @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, > > struct netdev_linux *dev; > > int ret; > > > > - if (concurrent_txq) { > > + /* Lock is required when mixing afxdp pmd and nonpmd mode. > > + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. > > + */ > > + if (concurrent_txq || (nonpmd_cnt != 0)) { > > I'm confused about this change. Are you expecting same device being > opened twice with different netdev type? Database should not allow this. Not the same device. For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. When dev2 receives a packet and send to dev1, the main thread used by dev2 is calling the send(), while it is possible that dev2's pmd thread is also calling send(). So need a lock here. William
On 2/27/20 6:51 PM, William Tu wrote: > On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: >> >> On 2/27/20 6:30 PM, William Tu wrote: >>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp >>> interrupt mode. This is similar to 'type=afxdp', except that the >>> is_pmd field is set to false. As a result, the packet processing >>> is handled by main thread, not pmd thread. This avoids burning >>> the CPU to always 100% when there is no traffic. >>> >>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 >>> Signed-off-by: William Tu <u9012063@gmail.com> >>> --- >>> NEWS | 3 +++ >>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- >>> lib/netdev-linux.c | 20 ++++++++++++++++++++ >>> lib/netdev-provider.h | 1 + >>> lib/netdev.c | 1 + >>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ >>> 6 files changed, 67 insertions(+), 1 deletion(-) >>> >>> diff --git a/NEWS b/NEWS >>> index f62ef1f47ea8..594c55dc11d6 100644 >>> --- a/NEWS >>> +++ b/NEWS >>> @@ -4,6 +4,9 @@ Post-v2.13.0 >>> - OpenFlow: >>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the >>> value of other-config:dp-sn in the Bridge table. >>> + - AF_XDP: >>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles >>> + by enabling interrupt mode. >>> >>> >>> v2.13.0 - 14 Feb 2020 >>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c >>> index 482400d8d135..cd2c7c381139 100644 >>> --- a/lib/netdev-afxdp.c >>> +++ b/lib/netdev-afxdp.c >>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { >>> ); >>> }; >>> >>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ >>> +static bool >>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { >>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; >>> +} >>> + >>> #ifdef HAVE_XDP_NEED_WAKEUP >>> static inline void >>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, >>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, >>> struct netdev_linux *dev; >>> int ret; >>> >>> - if (concurrent_txq) { >>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. >>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. >>> + */ >>> + if (concurrent_txq || (nonpmd_cnt != 0)) { >> >> I'm confused about this change. Are you expecting same device being >> opened twice with different netdev type? Database should not allow this. > > Not the same device. > > For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. > When dev2 receives a packet and send to dev1, the main thread used by dev2 > is calling the send(), while it is possible that dev2's pmd thread is > also calling > send(). So need a lock here. But they will send to different tx queues.
On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 2/27/20 6:51 PM, William Tu wrote: > > On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: > >> > >> On 2/27/20 6:30 PM, William Tu wrote: > >>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp > >>> interrupt mode. This is similar to 'type=afxdp', except that the > >>> is_pmd field is set to false. As a result, the packet processing > >>> is handled by main thread, not pmd thread. This avoids burning > >>> the CPU to always 100% when there is no traffic. > >>> > >>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 > >>> Signed-off-by: William Tu <u9012063@gmail.com> > >>> --- > >>> NEWS | 3 +++ > >>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- > >>> lib/netdev-linux.c | 20 ++++++++++++++++++++ > >>> lib/netdev-provider.h | 1 + > >>> lib/netdev.c | 1 + > >>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ > >>> 6 files changed, 67 insertions(+), 1 deletion(-) > >>> > >>> diff --git a/NEWS b/NEWS > >>> index f62ef1f47ea8..594c55dc11d6 100644 > >>> --- a/NEWS > >>> +++ b/NEWS > >>> @@ -4,6 +4,9 @@ Post-v2.13.0 > >>> - OpenFlow: > >>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the > >>> value of other-config:dp-sn in the Bridge table. > >>> + - AF_XDP: > >>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles > >>> + by enabling interrupt mode. > >>> > >>> > >>> v2.13.0 - 14 Feb 2020 > >>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > >>> index 482400d8d135..cd2c7c381139 100644 > >>> --- a/lib/netdev-afxdp.c > >>> +++ b/lib/netdev-afxdp.c > >>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { > >>> ); > >>> }; > >>> > >>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ > >>> +static bool > >>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { > >>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; > >>> +} > >>> + > >>> #ifdef HAVE_XDP_NEED_WAKEUP > >>> static inline void > >>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, > >>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, > >>> struct netdev_linux *dev; > >>> int ret; > >>> > >>> - if (concurrent_txq) { > >>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. > >>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. > >>> + */ > >>> + if (concurrent_txq || (nonpmd_cnt != 0)) { > >> > >> I'm confused about this change. Are you expecting same device being > >> opened twice with different netdev type? Database should not allow this. > > > > Not the same device. > > > > For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. > > When dev2 receives a packet and send to dev1, the main thread used by dev2 > > is calling the send(), while it is possible that dev2's pmd thread is > > also calling > > send(). So need a lock here. > > But they will send to different tx queues. OK, I think you are talking about the XPS feature (dynamic txqs). I thought XPS can only work when all between pmd threads. So adding a lock here. The patch currently doesn't work without the lock. Let me investigate more. Thanks William
On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote: > > On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > > > On 2/27/20 6:51 PM, William Tu wrote: > > > On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > >> > > >> On 2/27/20 6:30 PM, William Tu wrote: > > >>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp > > >>> interrupt mode. This is similar to 'type=afxdp', except that the > > >>> is_pmd field is set to false. As a result, the packet processing > > >>> is handled by main thread, not pmd thread. This avoids burning > > >>> the CPU to always 100% when there is no traffic. > > >>> > > >>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 > > >>> Signed-off-by: William Tu <u9012063@gmail.com> > > >>> --- > > >>> NEWS | 3 +++ > > >>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- > > >>> lib/netdev-linux.c | 20 ++++++++++++++++++++ > > >>> lib/netdev-provider.h | 1 + > > >>> lib/netdev.c | 1 + > > >>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ > > >>> 6 files changed, 67 insertions(+), 1 deletion(-) > > >>> > > >>> diff --git a/NEWS b/NEWS > > >>> index f62ef1f47ea8..594c55dc11d6 100644 > > >>> --- a/NEWS > > >>> +++ b/NEWS > > >>> @@ -4,6 +4,9 @@ Post-v2.13.0 > > >>> - OpenFlow: > > >>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the > > >>> value of other-config:dp-sn in the Bridge table. > > >>> + - AF_XDP: > > >>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles > > >>> + by enabling interrupt mode. > > >>> > > >>> > > >>> v2.13.0 - 14 Feb 2020 > > >>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > > >>> index 482400d8d135..cd2c7c381139 100644 > > >>> --- a/lib/netdev-afxdp.c > > >>> +++ b/lib/netdev-afxdp.c > > >>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { > > >>> ); > > >>> }; > > >>> > > >>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ > > >>> +static bool > > >>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { > > >>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; > > >>> +} > > >>> + > > >>> #ifdef HAVE_XDP_NEED_WAKEUP > > >>> static inline void > > >>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, > > >>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, > > >>> struct netdev_linux *dev; > > >>> int ret; > > >>> > > >>> - if (concurrent_txq) { > > >>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. > > >>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. > > >>> + */ > > >>> + if (concurrent_txq || (nonpmd_cnt != 0)) { > > >> > > >> I'm confused about this change. Are you expecting same device being > > >> opened twice with different netdev type? Database should not allow this. > > > > > > Not the same device. > > > > > > For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. > > > When dev2 receives a packet and send to dev1, the main thread used by dev2 > > > is calling the send(), while it is possible that dev2's pmd thread is > > > also calling > > > send(). So need a lock here. > > > > But they will send to different tx queues. > > OK, I think you are talking about the XPS feature (dynamic txqs). > I thought XPS can only work when all between pmd threads. > So adding a lock here. > The patch currently doesn't work without the lock. Let me investigate more. > More details. On my test using veth (only have 1 rxq 1 txq) Somehow the main thread is assigned to use queue id = 2, which causes segfault. I will send v3 patch. Thanks William
On 2/27/20 8:52 PM, William Tu wrote: > On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote: >> >> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote: >>> >>> On 2/27/20 6:51 PM, William Tu wrote: >>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: >>>>> >>>>> On 2/27/20 6:30 PM, William Tu wrote: >>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp >>>>>> interrupt mode. This is similar to 'type=afxdp', except that the >>>>>> is_pmd field is set to false. As a result, the packet processing >>>>>> is handled by main thread, not pmd thread. This avoids burning >>>>>> the CPU to always 100% when there is no traffic. >>>>>> >>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 >>>>>> Signed-off-by: William Tu <u9012063@gmail.com> >>>>>> --- >>>>>> NEWS | 3 +++ >>>>>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- >>>>>> lib/netdev-linux.c | 20 ++++++++++++++++++++ >>>>>> lib/netdev-provider.h | 1 + >>>>>> lib/netdev.c | 1 + >>>>>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ >>>>>> 6 files changed, 67 insertions(+), 1 deletion(-) >>>>>> >>>>>> diff --git a/NEWS b/NEWS >>>>>> index f62ef1f47ea8..594c55dc11d6 100644 >>>>>> --- a/NEWS >>>>>> +++ b/NEWS >>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0 >>>>>> - OpenFlow: >>>>>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the >>>>>> value of other-config:dp-sn in the Bridge table. >>>>>> + - AF_XDP: >>>>>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles >>>>>> + by enabling interrupt mode. >>>>>> >>>>>> >>>>>> v2.13.0 - 14 Feb 2020 >>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c >>>>>> index 482400d8d135..cd2c7c381139 100644 >>>>>> --- a/lib/netdev-afxdp.c >>>>>> +++ b/lib/netdev-afxdp.c >>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { >>>>>> ); >>>>>> }; >>>>>> >>>>>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ >>>>>> +static bool >>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { >>>>>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; >>>>>> +} >>>>>> + >>>>>> #ifdef HAVE_XDP_NEED_WAKEUP >>>>>> static inline void >>>>>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, >>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, >>>>>> struct netdev_linux *dev; >>>>>> int ret; >>>>>> >>>>>> - if (concurrent_txq) { >>>>>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. >>>>>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. >>>>>> + */ >>>>>> + if (concurrent_txq || (nonpmd_cnt != 0)) { >>>>> >>>>> I'm confused about this change. Are you expecting same device being >>>>> opened twice with different netdev type? Database should not allow this. >>>> >>>> Not the same device. >>>> >>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. >>>> When dev2 receives a packet and send to dev1, the main thread used by dev2 >>>> is calling the send(), while it is possible that dev2's pmd thread is >>>> also calling >>>> send(). So need a lock here. >>> >>> But they will send to different tx queues. >> >> OK, I think you are talking about the XPS feature (dynamic txqs). >> I thought XPS can only work when all between pmd threads. >> So adding a lock here. >> The patch currently doesn't work without the lock. Let me investigate more. >> > More details. > On my test using veth (only have 1 rxq 1 txq) > Somehow the main thread is assigned to use queue id = 2, which causes segfault. That is very strange... Could you, please, share your test setup? I'll try to reproduce. Best regards, Ilya Maximets.
On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 2/27/20 8:52 PM, William Tu wrote: > > On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote: > >> > >> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote: > >>> > >>> On 2/27/20 6:51 PM, William Tu wrote: > >>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: > >>>>> > >>>>> On 2/27/20 6:30 PM, William Tu wrote: > >>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp > >>>>>> interrupt mode. This is similar to 'type=afxdp', except that the > >>>>>> is_pmd field is set to false. As a result, the packet processing > >>>>>> is handled by main thread, not pmd thread. This avoids burning > >>>>>> the CPU to always 100% when there is no traffic. > >>>>>> > >>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 > >>>>>> Signed-off-by: William Tu <u9012063@gmail.com> > >>>>>> --- > >>>>>> NEWS | 3 +++ > >>>>>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- > >>>>>> lib/netdev-linux.c | 20 ++++++++++++++++++++ > >>>>>> lib/netdev-provider.h | 1 + > >>>>>> lib/netdev.c | 1 + > >>>>>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ > >>>>>> 6 files changed, 67 insertions(+), 1 deletion(-) > >>>>>> > >>>>>> diff --git a/NEWS b/NEWS > >>>>>> index f62ef1f47ea8..594c55dc11d6 100644 > >>>>>> --- a/NEWS > >>>>>> +++ b/NEWS > >>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0 > >>>>>> - OpenFlow: > >>>>>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the > >>>>>> value of other-config:dp-sn in the Bridge table. > >>>>>> + - AF_XDP: > >>>>>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles > >>>>>> + by enabling interrupt mode. > >>>>>> > >>>>>> > >>>>>> v2.13.0 - 14 Feb 2020 > >>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > >>>>>> index 482400d8d135..cd2c7c381139 100644 > >>>>>> --- a/lib/netdev-afxdp.c > >>>>>> +++ b/lib/netdev-afxdp.c > >>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { > >>>>>> ); > >>>>>> }; > >>>>>> > >>>>>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ > >>>>>> +static bool > >>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { > >>>>>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; > >>>>>> +} > >>>>>> + > >>>>>> #ifdef HAVE_XDP_NEED_WAKEUP > >>>>>> static inline void > >>>>>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, > >>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, > >>>>>> struct netdev_linux *dev; > >>>>>> int ret; > >>>>>> > >>>>>> - if (concurrent_txq) { > >>>>>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. > >>>>>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. > >>>>>> + */ > >>>>>> + if (concurrent_txq || (nonpmd_cnt != 0)) { > >>>>> > >>>>> I'm confused about this change. Are you expecting same device being > >>>>> opened twice with different netdev type? Database should not allow this. > >>>> > >>>> Not the same device. > >>>> > >>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. > >>>> When dev2 receives a packet and send to dev1, the main thread used by dev2 > >>>> is calling the send(), while it is possible that dev2's pmd thread is > >>>> also calling > >>>> send(). So need a lock here. > >>> > >>> But they will send to different tx queues. > >> > >> OK, I think you are talking about the XPS feature (dynamic txqs). > >> I thought XPS can only work when all between pmd threads. > >> So adding a lock here. > >> The patch currently doesn't work without the lock. Let me investigate more. > >> > > More details. > > On my test using veth (only have 1 rxq 1 txq) > > Somehow the main thread is assigned to use queue id = 2, which causes segfault. > > That is very strange... > Could you, please, share your test setup? I'll try to reproduce. > > Best regards, Ilya Maximets. Sorry, somehow I miss your reply in my mailbox. I simply tested it using two namespaces, one device as afxdp, the other as afxdp-nonpmd. --- # start ovs-vswitchd and ovsdb-server ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf ip netns add at_ns0 ip link add p0 type veth peer name afxdp-p0 ip link set p0 netns at_ns0 ip link set dev afxdp-p0 up ovs-vsctl add-port br0 afxdp-p0 ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 type="afxdp-nonpmd" options:xdp-mode=generic ip netns exec at_ns0 sh << NS_EXEC_HEREDOC ip addr add "10.1.1.1/24" dev p0 ip link set dev p0 up NS_EXEC_HEREDOC ip netns add at_ns1 ip link add p1 type veth peer name afxdp-p1 ip link set p1 netns at_ns1 ip link set dev afxdp-p1 up ovs-vsctl add-port br0 afxdp-p1 -- \ set interface afxdp-p1 type="afxdp" options:xdp-mode=generic options:n_rxq=1 ip netns exec at_ns1 sh << NS_EXEC_HEREDOC ip addr add "10.1.1.2/24" dev p1 ip link set dev p1 up NS_EXEC_HEREDOC ip netns exec at_ns0 ping -c 10 -i .2 10.1.1.2 --- It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd. Regards, William
On 3/4/20 7:09 PM, William Tu wrote: > On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote: >> >> On 2/27/20 8:52 PM, William Tu wrote: >>> On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote: >>>> >>>> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote: >>>>> >>>>> On 2/27/20 6:51 PM, William Tu wrote: >>>>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: >>>>>>> >>>>>>> On 2/27/20 6:30 PM, William Tu wrote: >>>>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp >>>>>>>> interrupt mode. This is similar to 'type=afxdp', except that the >>>>>>>> is_pmd field is set to false. As a result, the packet processing >>>>>>>> is handled by main thread, not pmd thread. This avoids burning >>>>>>>> the CPU to always 100% when there is no traffic. >>>>>>>> >>>>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 >>>>>>>> Signed-off-by: William Tu <u9012063@gmail.com> >>>>>>>> --- >>>>>>>> NEWS | 3 +++ >>>>>>>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- >>>>>>>> lib/netdev-linux.c | 20 ++++++++++++++++++++ >>>>>>>> lib/netdev-provider.h | 1 + >>>>>>>> lib/netdev.c | 1 + >>>>>>>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ >>>>>>>> 6 files changed, 67 insertions(+), 1 deletion(-) >>>>>>>> >>>>>>>> diff --git a/NEWS b/NEWS >>>>>>>> index f62ef1f47ea8..594c55dc11d6 100644 >>>>>>>> --- a/NEWS >>>>>>>> +++ b/NEWS >>>>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0 >>>>>>>> - OpenFlow: >>>>>>>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the >>>>>>>> value of other-config:dp-sn in the Bridge table. >>>>>>>> + - AF_XDP: >>>>>>>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles >>>>>>>> + by enabling interrupt mode. >>>>>>>> >>>>>>>> >>>>>>>> v2.13.0 - 14 Feb 2020 >>>>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c >>>>>>>> index 482400d8d135..cd2c7c381139 100644 >>>>>>>> --- a/lib/netdev-afxdp.c >>>>>>>> +++ b/lib/netdev-afxdp.c >>>>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { >>>>>>>> ); >>>>>>>> }; >>>>>>>> >>>>>>>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ >>>>>>>> +static bool >>>>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { >>>>>>>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; >>>>>>>> +} >>>>>>>> + >>>>>>>> #ifdef HAVE_XDP_NEED_WAKEUP >>>>>>>> static inline void >>>>>>>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, >>>>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, >>>>>>>> struct netdev_linux *dev; >>>>>>>> int ret; >>>>>>>> >>>>>>>> - if (concurrent_txq) { >>>>>>>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. >>>>>>>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. >>>>>>>> + */ >>>>>>>> + if (concurrent_txq || (nonpmd_cnt != 0)) { >>>>>>> >>>>>>> I'm confused about this change. Are you expecting same device being >>>>>>> opened twice with different netdev type? Database should not allow this. >>>>>> >>>>>> Not the same device. >>>>>> >>>>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. >>>>>> When dev2 receives a packet and send to dev1, the main thread used by dev2 >>>>>> is calling the send(), while it is possible that dev2's pmd thread is >>>>>> also calling >>>>>> send(). So need a lock here. >>>>> >>>>> But they will send to different tx queues. >>>> >>>> OK, I think you are talking about the XPS feature (dynamic txqs). >>>> I thought XPS can only work when all between pmd threads. >>>> So adding a lock here. >>>> The patch currently doesn't work without the lock. Let me investigate more. >>>> >>> More details. >>> On my test using veth (only have 1 rxq 1 txq) >>> Somehow the main thread is assigned to use queue id = 2, which causes segfault. >> >> That is very strange... >> Could you, please, share your test setup? I'll try to reproduce. >> >> Best regards, Ilya Maximets. > > Sorry, somehow I miss your reply in my mailbox. > I simply tested it using two namespaces, one device as afxdp, the > other as afxdp-nonpmd. > --- > # start ovs-vswitchd and ovsdb-server > > ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev > ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf > > ip netns add at_ns0 > ip link add p0 type veth peer name afxdp-p0 > ip link set p0 netns at_ns0 > ip link set dev afxdp-p0 up > ovs-vsctl add-port br0 afxdp-p0 > ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 > type="afxdp-nonpmd" options:xdp-mode=generic > > ip netns exec at_ns0 sh << NS_EXEC_HEREDOC > ip addr add "10.1.1.1/24" dev p0 > ip link set dev p0 up > NS_EXEC_HEREDOC > > ip netns add at_ns1 > ip link add p1 type veth peer name afxdp-p1 > ip link set p1 netns at_ns1 > ip link set dev afxdp-p1 up > ovs-vsctl add-port br0 afxdp-p1 -- \ > set interface afxdp-p1 type="afxdp" > options:xdp-mode=generic options:n_rxq=1 > > ip netns exec at_ns1 sh << NS_EXEC_HEREDOC > ip addr add "10.1.1.2/24" dev p1 > ip link set dev p1 up > NS_EXEC_HEREDOC > > ip netns exec at_ns0 ping -c 10 -i .2 10.1.1.2 > --- > > It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd. Thanks. I'll try to reproduce on next week. Best regards, Ilya Maximets.
On 3/6/20 5:11 PM, Ilya Maximets wrote: > On 3/4/20 7:09 PM, William Tu wrote: >> On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote: >>> >>> On 2/27/20 8:52 PM, William Tu wrote: >>>> On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote: >>>>> >>>>> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote: >>>>>> >>>>>> On 2/27/20 6:51 PM, William Tu wrote: >>>>>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: >>>>>>>> >>>>>>>> On 2/27/20 6:30 PM, William Tu wrote: >>>>>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp >>>>>>>>> interrupt mode. This is similar to 'type=afxdp', except that the >>>>>>>>> is_pmd field is set to false. As a result, the packet processing >>>>>>>>> is handled by main thread, not pmd thread. This avoids burning >>>>>>>>> the CPU to always 100% when there is no traffic. >>>>>>>>> >>>>>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 >>>>>>>>> Signed-off-by: William Tu <u9012063@gmail.com> >>>>>>>>> --- >>>>>>>>> NEWS | 3 +++ >>>>>>>>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- >>>>>>>>> lib/netdev-linux.c | 20 ++++++++++++++++++++ >>>>>>>>> lib/netdev-provider.h | 1 + >>>>>>>>> lib/netdev.c | 1 + >>>>>>>>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ >>>>>>>>> 6 files changed, 67 insertions(+), 1 deletion(-) >>>>>>>>> >>>>>>>>> diff --git a/NEWS b/NEWS >>>>>>>>> index f62ef1f47ea8..594c55dc11d6 100644 >>>>>>>>> --- a/NEWS >>>>>>>>> +++ b/NEWS >>>>>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0 >>>>>>>>> - OpenFlow: >>>>>>>>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the >>>>>>>>> value of other-config:dp-sn in the Bridge table. >>>>>>>>> + - AF_XDP: >>>>>>>>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles >>>>>>>>> + by enabling interrupt mode. >>>>>>>>> >>>>>>>>> >>>>>>>>> v2.13.0 - 14 Feb 2020 >>>>>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c >>>>>>>>> index 482400d8d135..cd2c7c381139 100644 >>>>>>>>> --- a/lib/netdev-afxdp.c >>>>>>>>> +++ b/lib/netdev-afxdp.c >>>>>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { >>>>>>>>> ); >>>>>>>>> }; >>>>>>>>> >>>>>>>>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ >>>>>>>>> +static bool >>>>>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { >>>>>>>>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> #ifdef HAVE_XDP_NEED_WAKEUP >>>>>>>>> static inline void >>>>>>>>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, >>>>>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, >>>>>>>>> struct netdev_linux *dev; >>>>>>>>> int ret; >>>>>>>>> >>>>>>>>> - if (concurrent_txq) { >>>>>>>>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. >>>>>>>>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. >>>>>>>>> + */ >>>>>>>>> + if (concurrent_txq || (nonpmd_cnt != 0)) { >>>>>>>> >>>>>>>> I'm confused about this change. Are you expecting same device being >>>>>>>> opened twice with different netdev type? Database should not allow this. >>>>>>> >>>>>>> Not the same device. >>>>>>> >>>>>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. >>>>>>> When dev2 receives a packet and send to dev1, the main thread used by dev2 >>>>>>> is calling the send(), while it is possible that dev2's pmd thread is >>>>>>> also calling >>>>>>> send(). So need a lock here. >>>>>> >>>>>> But they will send to different tx queues. >>>>> >>>>> OK, I think you are talking about the XPS feature (dynamic txqs). >>>>> I thought XPS can only work when all between pmd threads. >>>>> So adding a lock here. >>>>> The patch currently doesn't work without the lock. Let me investigate more. >>>>> >>>> More details. >>>> On my test using veth (only have 1 rxq 1 txq) >>>> Somehow the main thread is assigned to use queue id = 2, which causes segfault. >>> >>> That is very strange... >>> Could you, please, share your test setup? I'll try to reproduce. >>> >>> Best regards, Ilya Maximets. >> >> Sorry, somehow I miss your reply in my mailbox. >> I simply tested it using two namespaces, one device as afxdp, the >> other as afxdp-nonpmd. >> --- >> # start ovs-vswitchd and ovsdb-server >> >> ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev >> ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf >> >> ip netns add at_ns0 >> ip link add p0 type veth peer name afxdp-p0 >> ip link set p0 netns at_ns0 >> ip link set dev afxdp-p0 up >> ovs-vsctl add-port br0 afxdp-p0 >> ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 >> type="afxdp-nonpmd" options:xdp-mode=generic >> >> ip netns exec at_ns0 sh << NS_EXEC_HEREDOC >> ip addr add "10.1.1.1/24" dev p0 >> ip link set dev p0 up >> NS_EXEC_HEREDOC >> >> ip netns add at_ns1 >> ip link add p1 type veth peer name afxdp-p1 >> ip link set p1 netns at_ns1 >> ip link set dev afxdp-p1 up >> ovs-vsctl add-port br0 afxdp-p1 -- \ >> set interface afxdp-p1 type="afxdp" >> options:xdp-mode=generic options:n_rxq=1 >> >> ip netns exec at_ns1 sh << NS_EXEC_HEREDOC >> ip addr add "10.1.1.2/24" dev p1 >> ip link set dev p1 up >> NS_EXEC_HEREDOC >> >> ip netns exec at_ns0 ping -c 10 -i .2 10.1.1.2 >> --- >> >> It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd. > > Thanks. I'll try to reproduce on next week. It took a bit longer, but I reproduced the issue and sent a fix here: https://patchwork.ozlabs.org/patch/1261072/ Best regards, Ilya Maximets.
On Wed, Mar 25, 2020 at 12:53:18AM +0100, Ilya Maximets wrote: > On 3/6/20 5:11 PM, Ilya Maximets wrote: > > On 3/4/20 7:09 PM, William Tu wrote: > >> On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote: > >>> > >>> On 2/27/20 8:52 PM, William Tu wrote: > >>>> On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote: > >>>>> > >>>>> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote: > >>>>>> > >>>>>> On 2/27/20 6:51 PM, William Tu wrote: > >>>>>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote: > >>>>>>>> > >>>>>>>> On 2/27/20 6:30 PM, William Tu wrote: > >>>>>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp > >>>>>>>>> interrupt mode. This is similar to 'type=afxdp', except that the > >>>>>>>>> is_pmd field is set to false. As a result, the packet processing > >>>>>>>>> is handled by main thread, not pmd thread. This avoids burning > >>>>>>>>> the CPU to always 100% when there is no traffic. > >>>>>>>>> > >>>>>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 > >>>>>>>>> Signed-off-by: William Tu <u9012063@gmail.com> > >>>>>>>>> --- > >>>>>>>>> NEWS | 3 +++ > >>>>>>>>> lib/netdev-afxdp.c | 20 +++++++++++++++++++- > >>>>>>>>> lib/netdev-linux.c | 20 ++++++++++++++++++++ > >>>>>>>>> lib/netdev-provider.h | 1 + > >>>>>>>>> lib/netdev.c | 1 + > >>>>>>>>> tests/system-afxdp.at | 23 +++++++++++++++++++++++ > >>>>>>>>> 6 files changed, 67 insertions(+), 1 deletion(-) > >>>>>>>>> > >>>>>>>>> diff --git a/NEWS b/NEWS > >>>>>>>>> index f62ef1f47ea8..594c55dc11d6 100644 > >>>>>>>>> --- a/NEWS > >>>>>>>>> +++ b/NEWS > >>>>>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0 > >>>>>>>>> - OpenFlow: > >>>>>>>>> * The OpenFlow ofp_desc/serial_num may now be configured by setting the > >>>>>>>>> value of other-config:dp-sn in the Bridge table. > >>>>>>>>> + - AF_XDP: > >>>>>>>>> + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles > >>>>>>>>> + by enabling interrupt mode. > >>>>>>>>> > >>>>>>>>> > >>>>>>>>> v2.13.0 - 14 Feb 2020 > >>>>>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > >>>>>>>>> index 482400d8d135..cd2c7c381139 100644 > >>>>>>>>> --- a/lib/netdev-afxdp.c > >>>>>>>>> +++ b/lib/netdev-afxdp.c > >>>>>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { > >>>>>>>>> ); > >>>>>>>>> }; > >>>>>>>>> > >>>>>>>>> +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ > >>>>>>>>> +static bool > >>>>>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) { > >>>>>>>>> + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; > >>>>>>>>> +} > >>>>>>>>> + > >>>>>>>>> #ifdef HAVE_XDP_NEED_WAKEUP > >>>>>>>>> static inline void > >>>>>>>>> xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, > >>>>>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, > >>>>>>>>> struct netdev_linux *dev; > >>>>>>>>> int ret; > >>>>>>>>> > >>>>>>>>> - if (concurrent_txq) { > >>>>>>>>> + /* Lock is required when mixing afxdp pmd and nonpmd mode. > >>>>>>>>> + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. > >>>>>>>>> + */ > >>>>>>>>> + if (concurrent_txq || (nonpmd_cnt != 0)) { > >>>>>>>> > >>>>>>>> I'm confused about this change. Are you expecting same device being > >>>>>>>> opened twice with different netdev type? Database should not allow this. > >>>>>>> > >>>>>>> Not the same device. > >>>>>>> > >>>>>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'. > >>>>>>> When dev2 receives a packet and send to dev1, the main thread used by dev2 > >>>>>>> is calling the send(), while it is possible that dev2's pmd thread is > >>>>>>> also calling > >>>>>>> send(). So need a lock here. > >>>>>> > >>>>>> But they will send to different tx queues. > >>>>> > >>>>> OK, I think you are talking about the XPS feature (dynamic txqs). > >>>>> I thought XPS can only work when all between pmd threads. > >>>>> So adding a lock here. > >>>>> The patch currently doesn't work without the lock. Let me investigate more. > >>>>> > >>>> More details. > >>>> On my test using veth (only have 1 rxq 1 txq) > >>>> Somehow the main thread is assigned to use queue id = 2, which causes segfault. > >>> > >>> That is very strange... > >>> Could you, please, share your test setup? I'll try to reproduce. > >>> > >>> Best regards, Ilya Maximets. > >> > >> Sorry, somehow I miss your reply in my mailbox. > >> I simply tested it using two namespaces, one device as afxdp, the > >> other as afxdp-nonpmd. > >> --- > >> # start ovs-vswitchd and ovsdb-server > >> > >> ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev > >> ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf > >> > >> ip netns add at_ns0 > >> ip link add p0 type veth peer name afxdp-p0 > >> ip link set p0 netns at_ns0 > >> ip link set dev afxdp-p0 up > >> ovs-vsctl add-port br0 afxdp-p0 > >> ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 > >> type="afxdp-nonpmd" options:xdp-mode=generic > >> > >> ip netns exec at_ns0 sh << NS_EXEC_HEREDOC > >> ip addr add "10.1.1.1/24" dev p0 > >> ip link set dev p0 up > >> NS_EXEC_HEREDOC > >> > >> ip netns add at_ns1 > >> ip link add p1 type veth peer name afxdp-p1 > >> ip link set p1 netns at_ns1 > >> ip link set dev afxdp-p1 up > >> ovs-vsctl add-port br0 afxdp-p1 -- \ > >> set interface afxdp-p1 type="afxdp" > >> options:xdp-mode=generic options:n_rxq=1 > >> > >> ip netns exec at_ns1 sh << NS_EXEC_HEREDOC > >> ip addr add "10.1.1.2/24" dev p1 > >> ip link set dev p1 up > >> NS_EXEC_HEREDOC > >> > >> ip netns exec at_ns0 ping -c 10 -i .2 10.1.1.2 > >> --- > >> > >> It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd. > > > > Thanks. I'll try to reproduce on next week. > > It took a bit longer, but I reproduced the issue and sent a fix here: > https://patchwork.ozlabs.org/patch/1261072/ > > Best regards, Ilya Maximets. Thanks a lot! William
diff --git a/NEWS b/NEWS index f62ef1f47ea8..594c55dc11d6 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,9 @@ Post-v2.13.0 - OpenFlow: * The OpenFlow ofp_desc/serial_num may now be configured by setting the value of other-config:dp-sn in the Bridge table. + - AF_XDP: + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles + by enabling interrupt mode. v2.13.0 - 14 Feb 2020 diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c index 482400d8d135..cd2c7c381139 100644 --- a/lib/netdev-afxdp.c +++ b/lib/netdev-afxdp.c @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock { ); }; +static int nonpmd_cnt; /* Number of afxdp netdevs in non-pmd mode. */ +static bool +netdev_is_afxdp_nonpmd(struct netdev *netdev) { + return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class; +} + #ifdef HAVE_XDP_NEED_WAKEUP static inline void xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem, @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid, struct netdev_linux *dev; int ret; - if (concurrent_txq) { + /* Lock is required when mixing afxdp pmd and nonpmd mode. + * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'. + */ + if (concurrent_txq || (nonpmd_cnt != 0)) { dev = netdev_linux_cast(netdev); qid = qid % netdev_n_txq(netdev); @@ -1159,6 +1168,7 @@ libbpf_print(enum libbpf_print_level level, int netdev_afxdp_init(void) { libbpf_set_print(libbpf_print); + nonpmd_cnt = 0; return 0; } @@ -1188,6 +1198,10 @@ netdev_afxdp_construct(struct netdev *netdev) dev->tx_locks = NULL; netdev_request_reconfigure(netdev); + + if (netdev_is_afxdp_nonpmd(netdev)) { + nonpmd_cnt++; + } return 0; } @@ -1208,6 +1222,10 @@ netdev_afxdp_destruct(struct netdev *netdev) xsk_destroy_all(netdev); ovs_mutex_destroy(&dev->mutex); + + if (netdev_is_afxdp_nonpmd(netdev)) { + nonpmd_cnt--; + } } int diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index c6f3d27409b6..75b3b55d3cad 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -3605,6 +3605,26 @@ const struct netdev_class netdev_afxdp_class = { .rxq_destruct = netdev_afxdp_rxq_destruct, .rxq_recv = netdev_afxdp_rxq_recv, }; + +const struct netdev_class netdev_afxdp_nonpmd_class = { + NETDEV_LINUX_CLASS_COMMON, + .type = "afxdp-nonpmd", + .is_pmd = false, + .init = netdev_afxdp_init, + .construct = netdev_afxdp_construct, + .destruct = netdev_afxdp_destruct, + .get_stats = netdev_afxdp_get_stats, + .get_custom_stats = netdev_afxdp_get_custom_stats, + .get_status = netdev_linux_get_status, + .set_config = netdev_afxdp_set_config, + .get_config = netdev_afxdp_get_config, + .reconfigure = netdev_afxdp_reconfigure, + .get_numa_id = netdev_linux_get_numa_id, + .send = netdev_afxdp_batch_send, + .rxq_construct = netdev_afxdp_rxq_construct, + .rxq_destruct = netdev_afxdp_rxq_destruct, + .rxq_recv = netdev_afxdp_rxq_recv, +}; #endif diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 22f4cde3337a..06c1d98a0b07 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -848,6 +848,7 @@ extern const struct netdev_class netdev_tap_class; #ifdef HAVE_AF_XDP extern const struct netdev_class netdev_afxdp_class; +extern const struct netdev_class netdev_afxdp_nonpmd_class; #endif #ifdef __cplusplus } diff --git a/lib/netdev.c b/lib/netdev.c index f95b19af4da0..6d9723ebc3c1 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -154,6 +154,7 @@ netdev_initialize(void) netdev_register_flow_api_provider(&netdev_offload_tc); #ifdef HAVE_AF_XDP netdev_register_provider(&netdev_afxdp_class); + netdev_register_provider(&netdev_afxdp_nonpmd_class); #endif #endif #if defined(__FreeBSD__) || defined(__NetBSD__) diff --git a/tests/system-afxdp.at b/tests/system-afxdp.at index e4451624f882..0d09906fb6c8 100644 --- a/tests/system-afxdp.at +++ b/tests/system-afxdp.at @@ -22,3 +22,26 @@ AT_CHECK([grep "ovs-p0: could not set configuration" ovs-vswitchd.log | wc -l], OVS_TRAFFIC_VSWITCHD_STOP(["/ovs-p0: Too big 'n_rxq'/d /ovs-p0: could not set configuration/d"]) AT_CLEANUP + + +AT_SETUP([AF_XDP - ping between pmd and non-pmd ports]) +AT_KEYWORDS([afxdp nonpmd]) +OVS_TRAFFIC_VSWITCHD_START() + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) + +ADD_NAMESPACES(at_ns0, at_ns1) +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +AT_CHECK([ovs-vsctl del-port ovs-p0]) +AT_CHECK([ovs-vsctl add-port br0 ovs-p0 -- \ + set interface ovs-p0 type=afxdp-nonpmd options:n_rxq=1], + [0], [], [stderr]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP
The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp interrupt mode. This is similar to 'type=afxdp', except that the is_pmd field is set to false. As a result, the packet processing is handled by main thread, not pmd thread. This avoids burning the CPU to always 100% when there is no traffic. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506 Signed-off-by: William Tu <u9012063@gmail.com> --- NEWS | 3 +++ lib/netdev-afxdp.c | 20 +++++++++++++++++++- lib/netdev-linux.c | 20 ++++++++++++++++++++ lib/netdev-provider.h | 1 + lib/netdev.c | 1 + tests/system-afxdp.at | 23 +++++++++++++++++++++++ 6 files changed, 67 insertions(+), 1 deletion(-)