diff mbox series

[ovs-dev,PATCHv2] netdev-afxdp: Add interrupt mode netdev class.

Message ID 1582824604-113843-1-git-send-email-u9012063@gmail.com
State Changes Requested
Headers show
Series [ovs-dev,PATCHv2] netdev-afxdp: Add interrupt mode netdev class. | expand

Commit Message

William Tu Feb. 27, 2020, 5:30 p.m. UTC
The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
interrupt mode. This is similar to 'type=afxdp', except that the
is_pmd field is set to false. As a result, the packet processing
is handled by main thread, not pmd thread. This avoids burning
the CPU to always 100% when there is no traffic.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
Signed-off-by: William Tu <u9012063@gmail.com>
---
 NEWS                  |  3 +++
 lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
 lib/netdev-linux.c    | 20 ++++++++++++++++++++
 lib/netdev-provider.h |  1 +
 lib/netdev.c          |  1 +
 tests/system-afxdp.at | 23 +++++++++++++++++++++++
 6 files changed, 67 insertions(+), 1 deletion(-)

Comments

Ilya Maximets Feb. 27, 2020, 5:42 p.m. UTC | #1
On 2/27/20 6:30 PM, William Tu wrote:
> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
> interrupt mode. This is similar to 'type=afxdp', except that the
> is_pmd field is set to false. As a result, the packet processing
> is handled by main thread, not pmd thread. This avoids burning
> the CPU to always 100% when there is no traffic.
> 
> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
> Signed-off-by: William Tu <u9012063@gmail.com>
> ---
>  NEWS                  |  3 +++
>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
>  lib/netdev-provider.h |  1 +
>  lib/netdev.c          |  1 +
>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
>  6 files changed, 67 insertions(+), 1 deletion(-)
> 
> diff --git a/NEWS b/NEWS
> index f62ef1f47ea8..594c55dc11d6 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -4,6 +4,9 @@ Post-v2.13.0
>     - OpenFlow:
>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
>         value of other-config:dp-sn in the Bridge table.
> +   - AF_XDP:
> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
> +       by enabling interrupt mode.
>  
>  
>  v2.13.0 - 14 Feb 2020
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 482400d8d135..cd2c7c381139 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
>      );
>  };
>  
> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
> +static bool
> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
> +}
> +
>  #ifdef HAVE_XDP_NEED_WAKEUP
>  static inline void
>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
>      struct netdev_linux *dev;
>      int ret;
>  
> -    if (concurrent_txq) {
> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
> +     */
> +    if (concurrent_txq || (nonpmd_cnt != 0)) {

I'm confused about this change.  Are you expecting same device being
opened twice with different netdev type?  Database should not allow this.

>          dev = netdev_linux_cast(netdev);
>          qid = qid % netdev_n_txq(netdev);
>  
> @@ -1159,6 +1168,7 @@ libbpf_print(enum libbpf_print_level level,
>  int netdev_afxdp_init(void)
>  {
>      libbpf_set_print(libbpf_print);
> +    nonpmd_cnt = 0;
>      return 0;
>  }
>  
> @@ -1188,6 +1198,10 @@ netdev_afxdp_construct(struct netdev *netdev)
>      dev->tx_locks = NULL;
>  
>      netdev_request_reconfigure(netdev);
> +
> +    if (netdev_is_afxdp_nonpmd(netdev)) {
> +        nonpmd_cnt++;
> +    }
>      return 0;
>  }
>  
> @@ -1208,6 +1222,10 @@ netdev_afxdp_destruct(struct netdev *netdev)
>  
>      xsk_destroy_all(netdev);
>      ovs_mutex_destroy(&dev->mutex);
> +
> +    if (netdev_is_afxdp_nonpmd(netdev)) {
> +        nonpmd_cnt--;
> +    }
>  }
>  
>  int
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index c6f3d27409b6..75b3b55d3cad 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -3605,6 +3605,26 @@ const struct netdev_class netdev_afxdp_class = {
>      .rxq_destruct = netdev_afxdp_rxq_destruct,
>      .rxq_recv = netdev_afxdp_rxq_recv,
>  };
> +
> +const struct netdev_class netdev_afxdp_nonpmd_class = {
> +    NETDEV_LINUX_CLASS_COMMON,
> +    .type = "afxdp-nonpmd",
> +    .is_pmd = false,
> +    .init = netdev_afxdp_init,
> +    .construct = netdev_afxdp_construct,
> +    .destruct = netdev_afxdp_destruct,
> +    .get_stats = netdev_afxdp_get_stats,
> +    .get_custom_stats = netdev_afxdp_get_custom_stats,
> +    .get_status = netdev_linux_get_status,
> +    .set_config = netdev_afxdp_set_config,
> +    .get_config = netdev_afxdp_get_config,
> +    .reconfigure = netdev_afxdp_reconfigure,
> +    .get_numa_id = netdev_linux_get_numa_id,
> +    .send = netdev_afxdp_batch_send,
> +    .rxq_construct = netdev_afxdp_rxq_construct,
> +    .rxq_destruct = netdev_afxdp_rxq_destruct,
> +    .rxq_recv = netdev_afxdp_rxq_recv,
> +};
>  #endif
>  
>  
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 22f4cde3337a..06c1d98a0b07 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -848,6 +848,7 @@ extern const struct netdev_class netdev_tap_class;
>  
>  #ifdef HAVE_AF_XDP
>  extern const struct netdev_class netdev_afxdp_class;
> +extern const struct netdev_class netdev_afxdp_nonpmd_class;
>  #endif
>  #ifdef  __cplusplus
>  }
> diff --git a/lib/netdev.c b/lib/netdev.c
> index f95b19af4da0..6d9723ebc3c1 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -154,6 +154,7 @@ netdev_initialize(void)
>          netdev_register_flow_api_provider(&netdev_offload_tc);
>  #ifdef HAVE_AF_XDP
>          netdev_register_provider(&netdev_afxdp_class);
> +        netdev_register_provider(&netdev_afxdp_nonpmd_class);
>  #endif
>  #endif
>  #if defined(__FreeBSD__) || defined(__NetBSD__)
> diff --git a/tests/system-afxdp.at b/tests/system-afxdp.at
> index e4451624f882..0d09906fb6c8 100644
> --- a/tests/system-afxdp.at
> +++ b/tests/system-afxdp.at
> @@ -22,3 +22,26 @@ AT_CHECK([grep "ovs-p0: could not set configuration" ovs-vswitchd.log | wc -l],
>  OVS_TRAFFIC_VSWITCHD_STOP(["/ovs-p0: Too big 'n_rxq'/d
>  /ovs-p0: could not set configuration/d"])
>  AT_CLEANUP
> +
> +
> +AT_SETUP([AF_XDP - ping between pmd and non-pmd ports])
> +AT_KEYWORDS([afxdp nonpmd])
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +
> +AT_CHECK([ovs-vsctl del-port ovs-p0])
> +AT_CHECK([ovs-vsctl add-port br0 ovs-p0 -- \
> +                    set interface ovs-p0 type=afxdp-nonpmd options:n_rxq=1],
> +         [0], [], [stderr])
> +
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
>
William Tu Feb. 27, 2020, 5:51 p.m. UTC | #2
On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 2/27/20 6:30 PM, William Tu wrote:
> > The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
> > interrupt mode. This is similar to 'type=afxdp', except that the
> > is_pmd field is set to false. As a result, the packet processing
> > is handled by main thread, not pmd thread. This avoids burning
> > the CPU to always 100% when there is no traffic.
> >
> > Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
> > Signed-off-by: William Tu <u9012063@gmail.com>
> > ---
> >  NEWS                  |  3 +++
> >  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
> >  lib/netdev-linux.c    | 20 ++++++++++++++++++++
> >  lib/netdev-provider.h |  1 +
> >  lib/netdev.c          |  1 +
> >  tests/system-afxdp.at | 23 +++++++++++++++++++++++
> >  6 files changed, 67 insertions(+), 1 deletion(-)
> >
> > diff --git a/NEWS b/NEWS
> > index f62ef1f47ea8..594c55dc11d6 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -4,6 +4,9 @@ Post-v2.13.0
> >     - OpenFlow:
> >       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
> >         value of other-config:dp-sn in the Bridge table.
> > +   - AF_XDP:
> > +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
> > +       by enabling interrupt mode.
> >
> >
> >  v2.13.0 - 14 Feb 2020
> > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> > index 482400d8d135..cd2c7c381139 100644
> > --- a/lib/netdev-afxdp.c
> > +++ b/lib/netdev-afxdp.c
> > @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
> >      );
> >  };
> >
> > +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
> > +static bool
> > +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
> > +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
> > +}
> > +
> >  #ifdef HAVE_XDP_NEED_WAKEUP
> >  static inline void
> >  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> > @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
> >      struct netdev_linux *dev;
> >      int ret;
> >
> > -    if (concurrent_txq) {
> > +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
> > +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
> > +     */
> > +    if (concurrent_txq || (nonpmd_cnt != 0)) {
>
> I'm confused about this change.  Are you expecting same device being
> opened twice with different netdev type?  Database should not allow this.

Not the same device.

For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
When dev2 receives a packet and send to dev1, the main thread used by dev2
is calling the send(), while it is possible that dev2's pmd thread is
also calling
send(). So need a lock here.

William
Ilya Maximets Feb. 27, 2020, 5:54 p.m. UTC | #3
On 2/27/20 6:51 PM, William Tu wrote:
> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 2/27/20 6:30 PM, William Tu wrote:
>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
>>> interrupt mode. This is similar to 'type=afxdp', except that the
>>> is_pmd field is set to false. As a result, the packet processing
>>> is handled by main thread, not pmd thread. This avoids burning
>>> the CPU to always 100% when there is no traffic.
>>>
>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
>>> Signed-off-by: William Tu <u9012063@gmail.com>
>>> ---
>>>  NEWS                  |  3 +++
>>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
>>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
>>>  lib/netdev-provider.h |  1 +
>>>  lib/netdev.c          |  1 +
>>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
>>>  6 files changed, 67 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/NEWS b/NEWS
>>> index f62ef1f47ea8..594c55dc11d6 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -4,6 +4,9 @@ Post-v2.13.0
>>>     - OpenFlow:
>>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
>>>         value of other-config:dp-sn in the Bridge table.
>>> +   - AF_XDP:
>>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
>>> +       by enabling interrupt mode.
>>>
>>>
>>>  v2.13.0 - 14 Feb 2020
>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
>>> index 482400d8d135..cd2c7c381139 100644
>>> --- a/lib/netdev-afxdp.c
>>> +++ b/lib/netdev-afxdp.c
>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
>>>      );
>>>  };
>>>
>>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
>>> +static bool
>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
>>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
>>> +}
>>> +
>>>  #ifdef HAVE_XDP_NEED_WAKEUP
>>>  static inline void
>>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
>>>      struct netdev_linux *dev;
>>>      int ret;
>>>
>>> -    if (concurrent_txq) {
>>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
>>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
>>> +     */
>>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
>>
>> I'm confused about this change.  Are you expecting same device being
>> opened twice with different netdev type?  Database should not allow this.
> 
> Not the same device.
> 
> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
> When dev2 receives a packet and send to dev1, the main thread used by dev2
> is calling the send(), while it is possible that dev2's pmd thread is
> also calling
> send(). So need a lock here.

But they will send to different tx queues.
William Tu Feb. 27, 2020, 7:10 p.m. UTC | #4
On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 2/27/20 6:51 PM, William Tu wrote:
> > On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>
> >> On 2/27/20 6:30 PM, William Tu wrote:
> >>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
> >>> interrupt mode. This is similar to 'type=afxdp', except that the
> >>> is_pmd field is set to false. As a result, the packet processing
> >>> is handled by main thread, not pmd thread. This avoids burning
> >>> the CPU to always 100% when there is no traffic.
> >>>
> >>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
> >>> Signed-off-by: William Tu <u9012063@gmail.com>
> >>> ---
> >>>  NEWS                  |  3 +++
> >>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
> >>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
> >>>  lib/netdev-provider.h |  1 +
> >>>  lib/netdev.c          |  1 +
> >>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
> >>>  6 files changed, 67 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/NEWS b/NEWS
> >>> index f62ef1f47ea8..594c55dc11d6 100644
> >>> --- a/NEWS
> >>> +++ b/NEWS
> >>> @@ -4,6 +4,9 @@ Post-v2.13.0
> >>>     - OpenFlow:
> >>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
> >>>         value of other-config:dp-sn in the Bridge table.
> >>> +   - AF_XDP:
> >>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
> >>> +       by enabling interrupt mode.
> >>>
> >>>
> >>>  v2.13.0 - 14 Feb 2020
> >>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> >>> index 482400d8d135..cd2c7c381139 100644
> >>> --- a/lib/netdev-afxdp.c
> >>> +++ b/lib/netdev-afxdp.c
> >>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
> >>>      );
> >>>  };
> >>>
> >>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
> >>> +static bool
> >>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
> >>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
> >>> +}
> >>> +
> >>>  #ifdef HAVE_XDP_NEED_WAKEUP
> >>>  static inline void
> >>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> >>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
> >>>      struct netdev_linux *dev;
> >>>      int ret;
> >>>
> >>> -    if (concurrent_txq) {
> >>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
> >>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
> >>> +     */
> >>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
> >>
> >> I'm confused about this change.  Are you expecting same device being
> >> opened twice with different netdev type?  Database should not allow this.
> >
> > Not the same device.
> >
> > For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
> > When dev2 receives a packet and send to dev1, the main thread used by dev2
> > is calling the send(), while it is possible that dev2's pmd thread is
> > also calling
> > send(). So need a lock here.
>
> But they will send to different tx queues.

OK,  I think you are talking about the XPS feature (dynamic txqs).
I thought XPS can only work when all between pmd threads.
So adding a lock here.
The patch currently doesn't work without the lock. Let me investigate more.

Thanks
William
William Tu Feb. 27, 2020, 7:52 p.m. UTC | #5
On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote:
>
> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >
> > On 2/27/20 6:51 PM, William Tu wrote:
> > > On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> > >>
> > >> On 2/27/20 6:30 PM, William Tu wrote:
> > >>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
> > >>> interrupt mode. This is similar to 'type=afxdp', except that the
> > >>> is_pmd field is set to false. As a result, the packet processing
> > >>> is handled by main thread, not pmd thread. This avoids burning
> > >>> the CPU to always 100% when there is no traffic.
> > >>>
> > >>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
> > >>> Signed-off-by: William Tu <u9012063@gmail.com>
> > >>> ---
> > >>>  NEWS                  |  3 +++
> > >>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
> > >>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
> > >>>  lib/netdev-provider.h |  1 +
> > >>>  lib/netdev.c          |  1 +
> > >>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
> > >>>  6 files changed, 67 insertions(+), 1 deletion(-)
> > >>>
> > >>> diff --git a/NEWS b/NEWS
> > >>> index f62ef1f47ea8..594c55dc11d6 100644
> > >>> --- a/NEWS
> > >>> +++ b/NEWS
> > >>> @@ -4,6 +4,9 @@ Post-v2.13.0
> > >>>     - OpenFlow:
> > >>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
> > >>>         value of other-config:dp-sn in the Bridge table.
> > >>> +   - AF_XDP:
> > >>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
> > >>> +       by enabling interrupt mode.
> > >>>
> > >>>
> > >>>  v2.13.0 - 14 Feb 2020
> > >>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> > >>> index 482400d8d135..cd2c7c381139 100644
> > >>> --- a/lib/netdev-afxdp.c
> > >>> +++ b/lib/netdev-afxdp.c
> > >>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
> > >>>      );
> > >>>  };
> > >>>
> > >>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
> > >>> +static bool
> > >>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
> > >>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
> > >>> +}
> > >>> +
> > >>>  #ifdef HAVE_XDP_NEED_WAKEUP
> > >>>  static inline void
> > >>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> > >>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
> > >>>      struct netdev_linux *dev;
> > >>>      int ret;
> > >>>
> > >>> -    if (concurrent_txq) {
> > >>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
> > >>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
> > >>> +     */
> > >>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
> > >>
> > >> I'm confused about this change.  Are you expecting same device being
> > >> opened twice with different netdev type?  Database should not allow this.
> > >
> > > Not the same device.
> > >
> > > For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
> > > When dev2 receives a packet and send to dev1, the main thread used by dev2
> > > is calling the send(), while it is possible that dev2's pmd thread is
> > > also calling
> > > send(). So need a lock here.
> >
> > But they will send to different tx queues.
>
> OK,  I think you are talking about the XPS feature (dynamic txqs).
> I thought XPS can only work when all between pmd threads.
> So adding a lock here.
> The patch currently doesn't work without the lock. Let me investigate more.
>
More details.
On my test using veth (only have 1 rxq 1 txq)
Somehow the main thread is assigned to use queue id  = 2, which causes segfault.
I will send v3 patch.
Thanks
William
Ilya Maximets Feb. 28, 2020, 8:11 a.m. UTC | #6
On 2/27/20 8:52 PM, William Tu wrote:
> On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote:
>>
>> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>
>>> On 2/27/20 6:51 PM, William Tu wrote:
>>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>>>
>>>>> On 2/27/20 6:30 PM, William Tu wrote:
>>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
>>>>>> interrupt mode. This is similar to 'type=afxdp', except that the
>>>>>> is_pmd field is set to false. As a result, the packet processing
>>>>>> is handled by main thread, not pmd thread. This avoids burning
>>>>>> the CPU to always 100% when there is no traffic.
>>>>>>
>>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
>>>>>> Signed-off-by: William Tu <u9012063@gmail.com>
>>>>>> ---
>>>>>>  NEWS                  |  3 +++
>>>>>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
>>>>>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
>>>>>>  lib/netdev-provider.h |  1 +
>>>>>>  lib/netdev.c          |  1 +
>>>>>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
>>>>>>  6 files changed, 67 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/NEWS b/NEWS
>>>>>> index f62ef1f47ea8..594c55dc11d6 100644
>>>>>> --- a/NEWS
>>>>>> +++ b/NEWS
>>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0
>>>>>>     - OpenFlow:
>>>>>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
>>>>>>         value of other-config:dp-sn in the Bridge table.
>>>>>> +   - AF_XDP:
>>>>>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
>>>>>> +       by enabling interrupt mode.
>>>>>>
>>>>>>
>>>>>>  v2.13.0 - 14 Feb 2020
>>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
>>>>>> index 482400d8d135..cd2c7c381139 100644
>>>>>> --- a/lib/netdev-afxdp.c
>>>>>> +++ b/lib/netdev-afxdp.c
>>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
>>>>>>      );
>>>>>>  };
>>>>>>
>>>>>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
>>>>>> +static bool
>>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
>>>>>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
>>>>>> +}
>>>>>> +
>>>>>>  #ifdef HAVE_XDP_NEED_WAKEUP
>>>>>>  static inline void
>>>>>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
>>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
>>>>>>      struct netdev_linux *dev;
>>>>>>      int ret;
>>>>>>
>>>>>> -    if (concurrent_txq) {
>>>>>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
>>>>>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
>>>>>> +     */
>>>>>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
>>>>>
>>>>> I'm confused about this change.  Are you expecting same device being
>>>>> opened twice with different netdev type?  Database should not allow this.
>>>>
>>>> Not the same device.
>>>>
>>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
>>>> When dev2 receives a packet and send to dev1, the main thread used by dev2
>>>> is calling the send(), while it is possible that dev2's pmd thread is
>>>> also calling
>>>> send(). So need a lock here.
>>>
>>> But they will send to different tx queues.
>>
>> OK,  I think you are talking about the XPS feature (dynamic txqs).
>> I thought XPS can only work when all between pmd threads.
>> So adding a lock here.
>> The patch currently doesn't work without the lock. Let me investigate more.
>>
> More details.
> On my test using veth (only have 1 rxq 1 txq)
> Somehow the main thread is assigned to use queue id  = 2, which causes segfault.

That is very strange...
Could you, please, share your test setup?  I'll try to reproduce.

Best regards, Ilya Maximets.
William Tu March 4, 2020, 6:09 p.m. UTC | #7
On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 2/27/20 8:52 PM, William Tu wrote:
> > On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote:
> >>
> >> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>>
> >>> On 2/27/20 6:51 PM, William Tu wrote:
> >>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>>>>
> >>>>> On 2/27/20 6:30 PM, William Tu wrote:
> >>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
> >>>>>> interrupt mode. This is similar to 'type=afxdp', except that the
> >>>>>> is_pmd field is set to false. As a result, the packet processing
> >>>>>> is handled by main thread, not pmd thread. This avoids burning
> >>>>>> the CPU to always 100% when there is no traffic.
> >>>>>>
> >>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
> >>>>>> Signed-off-by: William Tu <u9012063@gmail.com>
> >>>>>> ---
> >>>>>>  NEWS                  |  3 +++
> >>>>>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
> >>>>>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
> >>>>>>  lib/netdev-provider.h |  1 +
> >>>>>>  lib/netdev.c          |  1 +
> >>>>>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
> >>>>>>  6 files changed, 67 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/NEWS b/NEWS
> >>>>>> index f62ef1f47ea8..594c55dc11d6 100644
> >>>>>> --- a/NEWS
> >>>>>> +++ b/NEWS
> >>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0
> >>>>>>     - OpenFlow:
> >>>>>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
> >>>>>>         value of other-config:dp-sn in the Bridge table.
> >>>>>> +   - AF_XDP:
> >>>>>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
> >>>>>> +       by enabling interrupt mode.
> >>>>>>
> >>>>>>
> >>>>>>  v2.13.0 - 14 Feb 2020
> >>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> >>>>>> index 482400d8d135..cd2c7c381139 100644
> >>>>>> --- a/lib/netdev-afxdp.c
> >>>>>> +++ b/lib/netdev-afxdp.c
> >>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
> >>>>>>      );
> >>>>>>  };
> >>>>>>
> >>>>>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
> >>>>>> +static bool
> >>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
> >>>>>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
> >>>>>> +}
> >>>>>> +
> >>>>>>  #ifdef HAVE_XDP_NEED_WAKEUP
> >>>>>>  static inline void
> >>>>>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> >>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
> >>>>>>      struct netdev_linux *dev;
> >>>>>>      int ret;
> >>>>>>
> >>>>>> -    if (concurrent_txq) {
> >>>>>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
> >>>>>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
> >>>>>> +     */
> >>>>>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
> >>>>>
> >>>>> I'm confused about this change.  Are you expecting same device being
> >>>>> opened twice with different netdev type?  Database should not allow this.
> >>>>
> >>>> Not the same device.
> >>>>
> >>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
> >>>> When dev2 receives a packet and send to dev1, the main thread used by dev2
> >>>> is calling the send(), while it is possible that dev2's pmd thread is
> >>>> also calling
> >>>> send(). So need a lock here.
> >>>
> >>> But they will send to different tx queues.
> >>
> >> OK,  I think you are talking about the XPS feature (dynamic txqs).
> >> I thought XPS can only work when all between pmd threads.
> >> So adding a lock here.
> >> The patch currently doesn't work without the lock. Let me investigate more.
> >>
> > More details.
> > On my test using veth (only have 1 rxq 1 txq)
> > Somehow the main thread is assigned to use queue id  = 2, which causes segfault.
>
> That is very strange...
> Could you, please, share your test setup?  I'll try to reproduce.
>
> Best regards, Ilya Maximets.

Sorry, somehow I miss your reply in my mailbox.
I simply tested it using two namespaces, one device as afxdp, the
other as afxdp-nonpmd.
---
# start ovs-vswitchd and ovsdb-server

ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf

ip netns add at_ns0
ip link add p0 type veth peer name afxdp-p0
ip link set p0 netns at_ns0
ip link set dev afxdp-p0 up
ovs-vsctl add-port br0 afxdp-p0
ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1
type="afxdp-nonpmd" options:xdp-mode=generic

ip netns exec at_ns0 sh << NS_EXEC_HEREDOC
ip addr add "10.1.1.1/24" dev p0
ip link set dev p0 up
NS_EXEC_HEREDOC

ip netns add at_ns1
ip link add p1 type veth peer name afxdp-p1
ip link set p1 netns at_ns1
ip link set dev afxdp-p1 up
ovs-vsctl add-port br0 afxdp-p1 -- \
               set interface afxdp-p1 type="afxdp"
options:xdp-mode=generic options:n_rxq=1

ip netns exec at_ns1 sh << NS_EXEC_HEREDOC
ip addr add "10.1.1.2/24" dev p1
ip link set dev p1 up
NS_EXEC_HEREDOC

ip netns exec at_ns0 ping  -c 10 -i .2 10.1.1.2
---

It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd.

Regards,
William
Ilya Maximets March 6, 2020, 4:11 p.m. UTC | #8
On 3/4/20 7:09 PM, William Tu wrote:
> On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 2/27/20 8:52 PM, William Tu wrote:
>>> On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote:
>>>>
>>>> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>>>
>>>>> On 2/27/20 6:51 PM, William Tu wrote:
>>>>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>>>>>
>>>>>>> On 2/27/20 6:30 PM, William Tu wrote:
>>>>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
>>>>>>>> interrupt mode. This is similar to 'type=afxdp', except that the
>>>>>>>> is_pmd field is set to false. As a result, the packet processing
>>>>>>>> is handled by main thread, not pmd thread. This avoids burning
>>>>>>>> the CPU to always 100% when there is no traffic.
>>>>>>>>
>>>>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
>>>>>>>> Signed-off-by: William Tu <u9012063@gmail.com>
>>>>>>>> ---
>>>>>>>>  NEWS                  |  3 +++
>>>>>>>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
>>>>>>>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
>>>>>>>>  lib/netdev-provider.h |  1 +
>>>>>>>>  lib/netdev.c          |  1 +
>>>>>>>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
>>>>>>>>  6 files changed, 67 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/NEWS b/NEWS
>>>>>>>> index f62ef1f47ea8..594c55dc11d6 100644
>>>>>>>> --- a/NEWS
>>>>>>>> +++ b/NEWS
>>>>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0
>>>>>>>>     - OpenFlow:
>>>>>>>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
>>>>>>>>         value of other-config:dp-sn in the Bridge table.
>>>>>>>> +   - AF_XDP:
>>>>>>>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
>>>>>>>> +       by enabling interrupt mode.
>>>>>>>>
>>>>>>>>
>>>>>>>>  v2.13.0 - 14 Feb 2020
>>>>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
>>>>>>>> index 482400d8d135..cd2c7c381139 100644
>>>>>>>> --- a/lib/netdev-afxdp.c
>>>>>>>> +++ b/lib/netdev-afxdp.c
>>>>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
>>>>>>>>      );
>>>>>>>>  };
>>>>>>>>
>>>>>>>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
>>>>>>>> +static bool
>>>>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
>>>>>>>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  #ifdef HAVE_XDP_NEED_WAKEUP
>>>>>>>>  static inline void
>>>>>>>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
>>>>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
>>>>>>>>      struct netdev_linux *dev;
>>>>>>>>      int ret;
>>>>>>>>
>>>>>>>> -    if (concurrent_txq) {
>>>>>>>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
>>>>>>>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
>>>>>>>> +     */
>>>>>>>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
>>>>>>>
>>>>>>> I'm confused about this change.  Are you expecting same device being
>>>>>>> opened twice with different netdev type?  Database should not allow this.
>>>>>>
>>>>>> Not the same device.
>>>>>>
>>>>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
>>>>>> When dev2 receives a packet and send to dev1, the main thread used by dev2
>>>>>> is calling the send(), while it is possible that dev2's pmd thread is
>>>>>> also calling
>>>>>> send(). So need a lock here.
>>>>>
>>>>> But they will send to different tx queues.
>>>>
>>>> OK,  I think you are talking about the XPS feature (dynamic txqs).
>>>> I thought XPS can only work when all between pmd threads.
>>>> So adding a lock here.
>>>> The patch currently doesn't work without the lock. Let me investigate more.
>>>>
>>> More details.
>>> On my test using veth (only have 1 rxq 1 txq)
>>> Somehow the main thread is assigned to use queue id  = 2, which causes segfault.
>>
>> That is very strange...
>> Could you, please, share your test setup?  I'll try to reproduce.
>>
>> Best regards, Ilya Maximets.
> 
> Sorry, somehow I miss your reply in my mailbox.
> I simply tested it using two namespaces, one device as afxdp, the
> other as afxdp-nonpmd.
> ---
> # start ovs-vswitchd and ovsdb-server
> 
> ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
> ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf
> 
> ip netns add at_ns0
> ip link add p0 type veth peer name afxdp-p0
> ip link set p0 netns at_ns0
> ip link set dev afxdp-p0 up
> ovs-vsctl add-port br0 afxdp-p0
> ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1
> type="afxdp-nonpmd" options:xdp-mode=generic
> 
> ip netns exec at_ns0 sh << NS_EXEC_HEREDOC
> ip addr add "10.1.1.1/24" dev p0
> ip link set dev p0 up
> NS_EXEC_HEREDOC
> 
> ip netns add at_ns1
> ip link add p1 type veth peer name afxdp-p1
> ip link set p1 netns at_ns1
> ip link set dev afxdp-p1 up
> ovs-vsctl add-port br0 afxdp-p1 -- \
>                set interface afxdp-p1 type="afxdp"
> options:xdp-mode=generic options:n_rxq=1
> 
> ip netns exec at_ns1 sh << NS_EXEC_HEREDOC
> ip addr add "10.1.1.2/24" dev p1
> ip link set dev p1 up
> NS_EXEC_HEREDOC
> 
> ip netns exec at_ns0 ping  -c 10 -i .2 10.1.1.2
> ---
> 
> It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd.

Thanks.  I'll try to reproduce on next week.

Best regards, Ilya Maximets.
Ilya Maximets March 24, 2020, 11:53 p.m. UTC | #9
On 3/6/20 5:11 PM, Ilya Maximets wrote:
> On 3/4/20 7:09 PM, William Tu wrote:
>> On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>
>>> On 2/27/20 8:52 PM, William Tu wrote:
>>>> On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote:
>>>>>
>>>>> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>>>>
>>>>>> On 2/27/20 6:51 PM, William Tu wrote:
>>>>>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>>>>>>>
>>>>>>>> On 2/27/20 6:30 PM, William Tu wrote:
>>>>>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
>>>>>>>>> interrupt mode. This is similar to 'type=afxdp', except that the
>>>>>>>>> is_pmd field is set to false. As a result, the packet processing
>>>>>>>>> is handled by main thread, not pmd thread. This avoids burning
>>>>>>>>> the CPU to always 100% when there is no traffic.
>>>>>>>>>
>>>>>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
>>>>>>>>> Signed-off-by: William Tu <u9012063@gmail.com>
>>>>>>>>> ---
>>>>>>>>>  NEWS                  |  3 +++
>>>>>>>>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
>>>>>>>>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
>>>>>>>>>  lib/netdev-provider.h |  1 +
>>>>>>>>>  lib/netdev.c          |  1 +
>>>>>>>>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
>>>>>>>>>  6 files changed, 67 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/NEWS b/NEWS
>>>>>>>>> index f62ef1f47ea8..594c55dc11d6 100644
>>>>>>>>> --- a/NEWS
>>>>>>>>> +++ b/NEWS
>>>>>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0
>>>>>>>>>     - OpenFlow:
>>>>>>>>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
>>>>>>>>>         value of other-config:dp-sn in the Bridge table.
>>>>>>>>> +   - AF_XDP:
>>>>>>>>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
>>>>>>>>> +       by enabling interrupt mode.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>  v2.13.0 - 14 Feb 2020
>>>>>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
>>>>>>>>> index 482400d8d135..cd2c7c381139 100644
>>>>>>>>> --- a/lib/netdev-afxdp.c
>>>>>>>>> +++ b/lib/netdev-afxdp.c
>>>>>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
>>>>>>>>>      );
>>>>>>>>>  };
>>>>>>>>>
>>>>>>>>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
>>>>>>>>> +static bool
>>>>>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
>>>>>>>>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>  #ifdef HAVE_XDP_NEED_WAKEUP
>>>>>>>>>  static inline void
>>>>>>>>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
>>>>>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
>>>>>>>>>      struct netdev_linux *dev;
>>>>>>>>>      int ret;
>>>>>>>>>
>>>>>>>>> -    if (concurrent_txq) {
>>>>>>>>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
>>>>>>>>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
>>>>>>>>> +     */
>>>>>>>>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
>>>>>>>>
>>>>>>>> I'm confused about this change.  Are you expecting same device being
>>>>>>>> opened twice with different netdev type?  Database should not allow this.
>>>>>>>
>>>>>>> Not the same device.
>>>>>>>
>>>>>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
>>>>>>> When dev2 receives a packet and send to dev1, the main thread used by dev2
>>>>>>> is calling the send(), while it is possible that dev2's pmd thread is
>>>>>>> also calling
>>>>>>> send(). So need a lock here.
>>>>>>
>>>>>> But they will send to different tx queues.
>>>>>
>>>>> OK,  I think you are talking about the XPS feature (dynamic txqs).
>>>>> I thought XPS can only work when all between pmd threads.
>>>>> So adding a lock here.
>>>>> The patch currently doesn't work without the lock. Let me investigate more.
>>>>>
>>>> More details.
>>>> On my test using veth (only have 1 rxq 1 txq)
>>>> Somehow the main thread is assigned to use queue id  = 2, which causes segfault.
>>>
>>> That is very strange...
>>> Could you, please, share your test setup?  I'll try to reproduce.
>>>
>>> Best regards, Ilya Maximets.
>>
>> Sorry, somehow I miss your reply in my mailbox.
>> I simply tested it using two namespaces, one device as afxdp, the
>> other as afxdp-nonpmd.
>> ---
>> # start ovs-vswitchd and ovsdb-server
>>
>> ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
>> ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf
>>
>> ip netns add at_ns0
>> ip link add p0 type veth peer name afxdp-p0
>> ip link set p0 netns at_ns0
>> ip link set dev afxdp-p0 up
>> ovs-vsctl add-port br0 afxdp-p0
>> ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1
>> type="afxdp-nonpmd" options:xdp-mode=generic
>>
>> ip netns exec at_ns0 sh << NS_EXEC_HEREDOC
>> ip addr add "10.1.1.1/24" dev p0
>> ip link set dev p0 up
>> NS_EXEC_HEREDOC
>>
>> ip netns add at_ns1
>> ip link add p1 type veth peer name afxdp-p1
>> ip link set p1 netns at_ns1
>> ip link set dev afxdp-p1 up
>> ovs-vsctl add-port br0 afxdp-p1 -- \
>>                set interface afxdp-p1 type="afxdp"
>> options:xdp-mode=generic options:n_rxq=1
>>
>> ip netns exec at_ns1 sh << NS_EXEC_HEREDOC
>> ip addr add "10.1.1.2/24" dev p1
>> ip link set dev p1 up
>> NS_EXEC_HEREDOC
>>
>> ip netns exec at_ns0 ping  -c 10 -i .2 10.1.1.2
>> ---
>>
>> It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd.
> 
> Thanks.  I'll try to reproduce on next week.

It took a bit longer, but I reproduced the issue and sent a fix here:
https://patchwork.ozlabs.org/patch/1261072/

Best regards, Ilya Maximets.
William Tu March 25, 2020, 2:38 p.m. UTC | #10
On Wed, Mar 25, 2020 at 12:53:18AM +0100, Ilya Maximets wrote:
> On 3/6/20 5:11 PM, Ilya Maximets wrote:
> > On 3/4/20 7:09 PM, William Tu wrote:
> >> On Fri, Feb 28, 2020 at 12:12 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>>
> >>> On 2/27/20 8:52 PM, William Tu wrote:
> >>>> On Thu, Feb 27, 2020 at 11:10 AM William Tu <u9012063@gmail.com> wrote:
> >>>>>
> >>>>> On Thu, Feb 27, 2020 at 9:54 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>>>>>
> >>>>>> On 2/27/20 6:51 PM, William Tu wrote:
> >>>>>>> On Thu, Feb 27, 2020 at 9:42 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>>>>>>>
> >>>>>>>> On 2/27/20 6:30 PM, William Tu wrote:
> >>>>>>>>> The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp
> >>>>>>>>> interrupt mode. This is similar to 'type=afxdp', except that the
> >>>>>>>>> is_pmd field is set to false. As a result, the packet processing
> >>>>>>>>> is handled by main thread, not pmd thread. This avoids burning
> >>>>>>>>> the CPU to always 100% when there is no traffic.
> >>>>>>>>>
> >>>>>>>>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/655885506
> >>>>>>>>> Signed-off-by: William Tu <u9012063@gmail.com>
> >>>>>>>>> ---
> >>>>>>>>>  NEWS                  |  3 +++
> >>>>>>>>>  lib/netdev-afxdp.c    | 20 +++++++++++++++++++-
> >>>>>>>>>  lib/netdev-linux.c    | 20 ++++++++++++++++++++
> >>>>>>>>>  lib/netdev-provider.h |  1 +
> >>>>>>>>>  lib/netdev.c          |  1 +
> >>>>>>>>>  tests/system-afxdp.at | 23 +++++++++++++++++++++++
> >>>>>>>>>  6 files changed, 67 insertions(+), 1 deletion(-)
> >>>>>>>>>
> >>>>>>>>> diff --git a/NEWS b/NEWS
> >>>>>>>>> index f62ef1f47ea8..594c55dc11d6 100644
> >>>>>>>>> --- a/NEWS
> >>>>>>>>> +++ b/NEWS
> >>>>>>>>> @@ -4,6 +4,9 @@ Post-v2.13.0
> >>>>>>>>>     - OpenFlow:
> >>>>>>>>>       * The OpenFlow ofp_desc/serial_num may now be configured by setting the
> >>>>>>>>>         value of other-config:dp-sn in the Bridge table.
> >>>>>>>>> +   - AF_XDP:
> >>>>>>>>> +     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
> >>>>>>>>> +       by enabling interrupt mode.
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>  v2.13.0 - 14 Feb 2020
> >>>>>>>>> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> >>>>>>>>> index 482400d8d135..cd2c7c381139 100644
> >>>>>>>>> --- a/lib/netdev-afxdp.c
> >>>>>>>>> +++ b/lib/netdev-afxdp.c
> >>>>>>>>> @@ -169,6 +169,12 @@ struct netdev_afxdp_tx_lock {
> >>>>>>>>>      );
> >>>>>>>>>  };
> >>>>>>>>>
> >>>>>>>>> +static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
> >>>>>>>>> +static bool
> >>>>>>>>> +netdev_is_afxdp_nonpmd(struct netdev *netdev) {
> >>>>>>>>> +    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>>  #ifdef HAVE_XDP_NEED_WAKEUP
> >>>>>>>>>  static inline void
> >>>>>>>>>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
> >>>>>>>>> @@ -1115,7 +1121,10 @@ netdev_afxdp_batch_send(struct netdev *netdev, int qid,
> >>>>>>>>>      struct netdev_linux *dev;
> >>>>>>>>>      int ret;
> >>>>>>>>>
> >>>>>>>>> -    if (concurrent_txq) {
> >>>>>>>>> +    /* Lock is required when mixing afxdp pmd and nonpmd mode.
> >>>>>>>>> +     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
> >>>>>>>>> +     */
> >>>>>>>>> +    if (concurrent_txq || (nonpmd_cnt != 0)) {
> >>>>>>>>
> >>>>>>>> I'm confused about this change.  Are you expecting same device being
> >>>>>>>> opened twice with different netdev type?  Database should not allow this.
> >>>>>>>
> >>>>>>> Not the same device.
> >>>>>>>
> >>>>>>> For example, dev1 opened as 'afxdp', and dev2 opened as 'afxdp-nonpmd'.
> >>>>>>> When dev2 receives a packet and send to dev1, the main thread used by dev2
> >>>>>>> is calling the send(), while it is possible that dev2's pmd thread is
> >>>>>>> also calling
> >>>>>>> send(). So need a lock here.
> >>>>>>
> >>>>>> But they will send to different tx queues.
> >>>>>
> >>>>> OK,  I think you are talking about the XPS feature (dynamic txqs).
> >>>>> I thought XPS can only work when all between pmd threads.
> >>>>> So adding a lock here.
> >>>>> The patch currently doesn't work without the lock. Let me investigate more.
> >>>>>
> >>>> More details.
> >>>> On my test using veth (only have 1 rxq 1 txq)
> >>>> Somehow the main thread is assigned to use queue id  = 2, which causes segfault.
> >>>
> >>> That is very strange...
> >>> Could you, please, share your test setup?  I'll try to reproduce.
> >>>
> >>> Best regards, Ilya Maximets.
> >>
> >> Sorry, somehow I miss your reply in my mailbox.
> >> I simply tested it using two namespaces, one device as afxdp, the
> >> other as afxdp-nonpmd.
> >> ---
> >> # start ovs-vswitchd and ovsdb-server
> >>
> >> ovs-vsctl -- add-br br0 -- set Bridge br0 datapath_type=netdev
> >> ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf
> >>
> >> ip netns add at_ns0
> >> ip link add p0 type veth peer name afxdp-p0
> >> ip link set p0 netns at_ns0
> >> ip link set dev afxdp-p0 up
> >> ovs-vsctl add-port br0 afxdp-p0
> >> ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1
> >> type="afxdp-nonpmd" options:xdp-mode=generic
> >>
> >> ip netns exec at_ns0 sh << NS_EXEC_HEREDOC
> >> ip addr add "10.1.1.1/24" dev p0
> >> ip link set dev p0 up
> >> NS_EXEC_HEREDOC
> >>
> >> ip netns add at_ns1
> >> ip link add p1 type veth peer name afxdp-p1
> >> ip link set p1 netns at_ns1
> >> ip link set dev afxdp-p1 up
> >> ovs-vsctl add-port br0 afxdp-p1 -- \
> >>                set interface afxdp-p1 type="afxdp"
> >> options:xdp-mode=generic options:n_rxq=1
> >>
> >> ip netns exec at_ns1 sh << NS_EXEC_HEREDOC
> >> ip addr add "10.1.1.2/24" dev p1
> >> ip link set dev p1 up
> >> NS_EXEC_HEREDOC
> >>
> >> ip netns exec at_ns0 ping  -c 10 -i .2 10.1.1.2
> >> ---
> >>
> >> It's pretty easy to reproduce the issue, ping will crash the ovs-vswitchd.
> > 
> > Thanks.  I'll try to reproduce on next week.
> 
> It took a bit longer, but I reproduced the issue and sent a fix here:
> https://patchwork.ozlabs.org/patch/1261072/
> 
> Best regards, Ilya Maximets.
Thanks a lot!
William
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index f62ef1f47ea8..594c55dc11d6 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,9 @@  Post-v2.13.0
    - OpenFlow:
      * The OpenFlow ofp_desc/serial_num may now be configured by setting the
        value of other-config:dp-sn in the Bridge table.
+   - AF_XDP:
+     * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
+       by enabling interrupt mode.
 
 
 v2.13.0 - 14 Feb 2020
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 482400d8d135..cd2c7c381139 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -169,6 +169,12 @@  struct netdev_afxdp_tx_lock {
     );
 };
 
+static int nonpmd_cnt;  /* Number of afxdp netdevs in non-pmd mode. */
+static bool
+netdev_is_afxdp_nonpmd(struct netdev *netdev) {
+    return netdev_get_class(netdev) == &netdev_afxdp_nonpmd_class;
+}
+
 #ifdef HAVE_XDP_NEED_WAKEUP
 static inline void
 xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
@@ -1115,7 +1121,10 @@  netdev_afxdp_batch_send(struct netdev *netdev, int qid,
     struct netdev_linux *dev;
     int ret;
 
-    if (concurrent_txq) {
+    /* Lock is required when mixing afxdp pmd and nonpmd mode.
+     * ex: one device is created 'afxdp', the other is 'afxdp-nonpmd'.
+     */
+    if (concurrent_txq || (nonpmd_cnt != 0)) {
         dev = netdev_linux_cast(netdev);
         qid = qid % netdev_n_txq(netdev);
 
@@ -1159,6 +1168,7 @@  libbpf_print(enum libbpf_print_level level,
 int netdev_afxdp_init(void)
 {
     libbpf_set_print(libbpf_print);
+    nonpmd_cnt = 0;
     return 0;
 }
 
@@ -1188,6 +1198,10 @@  netdev_afxdp_construct(struct netdev *netdev)
     dev->tx_locks = NULL;
 
     netdev_request_reconfigure(netdev);
+
+    if (netdev_is_afxdp_nonpmd(netdev)) {
+        nonpmd_cnt++;
+    }
     return 0;
 }
 
@@ -1208,6 +1222,10 @@  netdev_afxdp_destruct(struct netdev *netdev)
 
     xsk_destroy_all(netdev);
     ovs_mutex_destroy(&dev->mutex);
+
+    if (netdev_is_afxdp_nonpmd(netdev)) {
+        nonpmd_cnt--;
+    }
 }
 
 int
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index c6f3d27409b6..75b3b55d3cad 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -3605,6 +3605,26 @@  const struct netdev_class netdev_afxdp_class = {
     .rxq_destruct = netdev_afxdp_rxq_destruct,
     .rxq_recv = netdev_afxdp_rxq_recv,
 };
+
+const struct netdev_class netdev_afxdp_nonpmd_class = {
+    NETDEV_LINUX_CLASS_COMMON,
+    .type = "afxdp-nonpmd",
+    .is_pmd = false,
+    .init = netdev_afxdp_init,
+    .construct = netdev_afxdp_construct,
+    .destruct = netdev_afxdp_destruct,
+    .get_stats = netdev_afxdp_get_stats,
+    .get_custom_stats = netdev_afxdp_get_custom_stats,
+    .get_status = netdev_linux_get_status,
+    .set_config = netdev_afxdp_set_config,
+    .get_config = netdev_afxdp_get_config,
+    .reconfigure = netdev_afxdp_reconfigure,
+    .get_numa_id = netdev_linux_get_numa_id,
+    .send = netdev_afxdp_batch_send,
+    .rxq_construct = netdev_afxdp_rxq_construct,
+    .rxq_destruct = netdev_afxdp_rxq_destruct,
+    .rxq_recv = netdev_afxdp_rxq_recv,
+};
 #endif
 
 
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 22f4cde3337a..06c1d98a0b07 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -848,6 +848,7 @@  extern const struct netdev_class netdev_tap_class;
 
 #ifdef HAVE_AF_XDP
 extern const struct netdev_class netdev_afxdp_class;
+extern const struct netdev_class netdev_afxdp_nonpmd_class;
 #endif
 #ifdef  __cplusplus
 }
diff --git a/lib/netdev.c b/lib/netdev.c
index f95b19af4da0..6d9723ebc3c1 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -154,6 +154,7 @@  netdev_initialize(void)
         netdev_register_flow_api_provider(&netdev_offload_tc);
 #ifdef HAVE_AF_XDP
         netdev_register_provider(&netdev_afxdp_class);
+        netdev_register_provider(&netdev_afxdp_nonpmd_class);
 #endif
 #endif
 #if defined(__FreeBSD__) || defined(__NetBSD__)
diff --git a/tests/system-afxdp.at b/tests/system-afxdp.at
index e4451624f882..0d09906fb6c8 100644
--- a/tests/system-afxdp.at
+++ b/tests/system-afxdp.at
@@ -22,3 +22,26 @@  AT_CHECK([grep "ovs-p0: could not set configuration" ovs-vswitchd.log | wc -l],
 OVS_TRAFFIC_VSWITCHD_STOP(["/ovs-p0: Too big 'n_rxq'/d
 /ovs-p0: could not set configuration/d"])
 AT_CLEANUP
+
+
+AT_SETUP([AF_XDP - ping between pmd and non-pmd ports])
+AT_KEYWORDS([afxdp nonpmd])
+OVS_TRAFFIC_VSWITCHD_START()
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_CHECK([ovs-vsctl del-port ovs-p0])
+AT_CHECK([ovs-vsctl add-port br0 ovs-p0 -- \
+                    set interface ovs-p0 type=afxdp-nonpmd options:n_rxq=1],
+         [0], [], [stderr])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP