diff mbox series

[RFC,net-next,v6,01/15] queue_api: define queue api

Message ID 20240305020153.2787423-2-almasrymina@google.com
State New
Headers show
Series Device Memory TCP | expand

Commit Message

Mina Almasry March 5, 2024, 2:01 a.m. UTC
This API enables the net stack to reset the queues used for devmem.

Signed-off-by: Mina Almasry <almasrymina@google.com>

---
 include/linux/netdevice.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Jakub Kicinski March 8, 2024, 1:30 a.m. UTC | #1
On Mon,  4 Mar 2024 18:01:36 -0800 Mina Almasry wrote:
> + * void *(*ndo_queue_mem_alloc)(struct net_device *dev, int idx);
> + *	Allocate memory for an RX queue. The memory returned in the form of
> + *	a void * can be passed to ndo_queue_mem_free() for freeing or to
> + *	ndo_queue_start to create an RX queue with this memory.
> + *
> + * void	(*ndo_queue_mem_free)(struct net_device *dev, void *);
> + *	Free memory from an RX queue.
> + *
> + * int (*ndo_queue_start)(struct net_device *dev, int idx, void *);
> + *	Start an RX queue at the specified index.
> + *
> + * int (*ndo_queue_stop)(struct net_device *dev, int idx, void **);
> + *	Stop the RX queue at the specified index.
>   */
>  struct net_device_ops {
>  	int			(*ndo_init)(struct net_device *dev);
> @@ -1679,6 +1693,16 @@ struct net_device_ops {
>  	int			(*ndo_hwtstamp_set)(struct net_device *dev,
>  						    struct kernel_hwtstamp_config *kernel_config,
>  						    struct netlink_ext_ack *extack);
> +	void *			(*ndo_queue_mem_alloc)(struct net_device *dev,
> +						       int idx);
> +	void			(*ndo_queue_mem_free)(struct net_device *dev,
> +						      void *queue_mem);
> +	int			(*ndo_queue_start)(struct net_device *dev,
> +						   int idx,
> +						   void *queue_mem);
> +	int			(*ndo_queue_stop)(struct net_device *dev,
> +						  int idx,
> +						  void **out_queue_mem);

The queue configuration object was quite an integral part of the design,
I'm slightly worried that it's not here :) Also we may want to rename
the about-to-be-merged ops from netdev_stat_ops and netdev_queue_ops,
and add these there?

https://lore.kernel.org/all/20240306195509.1502746-2-kuba@kernel.org/

Very excited to hear that you made progress on this and ported GVE over!
Mina Almasry March 8, 2024, 2:08 a.m. UTC | #2
On Thu, Mar 7, 2024 at 5:30 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Mon,  4 Mar 2024 18:01:36 -0800 Mina Almasry wrote:
> > + * void *(*ndo_queue_mem_alloc)(struct net_device *dev, int idx);
> > + *   Allocate memory for an RX queue. The memory returned in the form of
> > + *   a void * can be passed to ndo_queue_mem_free() for freeing or to
> > + *   ndo_queue_start to create an RX queue with this memory.
> > + *
> > + * void      (*ndo_queue_mem_free)(struct net_device *dev, void *);
> > + *   Free memory from an RX queue.
> > + *
> > + * int (*ndo_queue_start)(struct net_device *dev, int idx, void *);
> > + *   Start an RX queue at the specified index.
> > + *
> > + * int (*ndo_queue_stop)(struct net_device *dev, int idx, void **);
> > + *   Stop the RX queue at the specified index.
> >   */
> >  struct net_device_ops {
> >       int                     (*ndo_init)(struct net_device *dev);
> > @@ -1679,6 +1693,16 @@ struct net_device_ops {
> >       int                     (*ndo_hwtstamp_set)(struct net_device *dev,
> >                                                   struct kernel_hwtstamp_config *kernel_config,
> >                                                   struct netlink_ext_ack *extack);
> > +     void *                  (*ndo_queue_mem_alloc)(struct net_device *dev,
> > +                                                    int idx);
> > +     void                    (*ndo_queue_mem_free)(struct net_device *dev,
> > +                                                   void *queue_mem);
> > +     int                     (*ndo_queue_start)(struct net_device *dev,
> > +                                                int idx,
> > +                                                void *queue_mem);
> > +     int                     (*ndo_queue_stop)(struct net_device *dev,
> > +                                               int idx,
> > +                                               void **out_queue_mem);
>
> The queue configuration object was quite an integral part of the design,
> I'm slightly worried that it's not here :)

That was a bit of a simplification I'm making since we just want to
restart the queue. I thought it was OK to define some minimal version
here and extend it later with configuration? Because in this context
all we really need is to restart the queue, yes?

If extending with some configuration is a must please let me know what
configuration struct you're envisioning. Were you envisioning a stub?
Or some real configuration struct that we just don't use at the
moment? Or one that we use for this use case somehow?

> Also we may want to rename
> the about-to-be-merged ops from netdev_stat_ops and netdev_queue_ops,
> and add these there?
>
> https://lore.kernel.org/all/20240306195509.1502746-2-kuba@kernel.org/
>

Yeah, that sounds reasonable! Thanks! We could also keep the
netdev_stat_ops and add new netdev_queue_ops alongside them if you
prefer.

> Very excited to hear that you made progress on this and ported GVE over!

Actually, we're still discussing but it looks like my GVE queue API
implementation I proposed earlier may be a no-go. Likely someone from
the GVE team will follow up here with this piece, probably in a
separate series.

For now I'm carrying my POC for the GVE implementation out of tree
with the rest of the driver changes:

https://github.com/mina/linux/commit/501b734c80186545281e9edb1bf313f5a2d8cbee
Jakub Kicinski March 8, 2024, 3:36 a.m. UTC | #3
On Thu, 7 Mar 2024 18:08:24 -0800 Mina Almasry wrote:
> On Thu, Mar 7, 2024 at 5:30 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > On Mon,  4 Mar 2024 18:01:36 -0800 Mina Almasry wrote:  
> > > + * void *(*ndo_queue_mem_alloc)(struct net_device *dev, int idx);
> > > + *   Allocate memory for an RX queue. The memory returned in the form of
> > > + *   a void * can be passed to ndo_queue_mem_free() for freeing or to
> > > + *   ndo_queue_start to create an RX queue with this memory.
> > > + *
> > > + * void      (*ndo_queue_mem_free)(struct net_device *dev, void *);
> > > + *   Free memory from an RX queue.
> > > + *
> > > + * int (*ndo_queue_start)(struct net_device *dev, int idx, void *);
> > > + *   Start an RX queue at the specified index.
> > > + *
> > > + * int (*ndo_queue_stop)(struct net_device *dev, int idx, void **);
> > > + *   Stop the RX queue at the specified index.
> > >   */
> > >  struct net_device_ops {
> > >       int                     (*ndo_init)(struct net_device *dev);
> > > @@ -1679,6 +1693,16 @@ struct net_device_ops {
> > >       int                     (*ndo_hwtstamp_set)(struct net_device *dev,
> > >                                                   struct kernel_hwtstamp_config *kernel_config,
> > >                                                   struct netlink_ext_ack *extack);
> > > +     void *                  (*ndo_queue_mem_alloc)(struct net_device *dev,
> > > +                                                    int idx);
> > > +     void                    (*ndo_queue_mem_free)(struct net_device *dev,
> > > +                                                   void *queue_mem);
> > > +     int                     (*ndo_queue_start)(struct net_device *dev,
> > > +                                                int idx,
> > > +                                                void *queue_mem);
> > > +     int                     (*ndo_queue_stop)(struct net_device *dev,
> > > +                                               int idx,
> > > +                                               void **out_queue_mem);  
> >
> > The queue configuration object was quite an integral part of the design,
> > I'm slightly worried that it's not here :)  
> 
> That was a bit of a simplification I'm making since we just want to
> restart the queue. I thought it was OK to define some minimal version
> here and extend it later with configuration? Because in this context
> all we really need is to restart the queue, yes?

Right, I think it's perfectly fine for the time being.
It works, and is internal to the kernel.

> If extending with some configuration is a must please let me know what
> configuration struct you're envisioning. Were you envisioning a stub?
> Or some real configuration struct that we just don't use at the
> moment? Or one that we use for this use case somehow?

I had some ideas about storing the configuration as rules,
instead of directly in struct netdev_rx_queue.
E.g. default queue length = 2000, but for select queues you may
want a different length.
But application binding to a queue would always take precedence, 
so even if the ideas ever materialize there will be no uAPI change.

> > Also we may want to rename
> > the about-to-be-merged ops from netdev_stat_ops and netdev_queue_ops,
> > and add these there?
> >
> > https://lore.kernel.org/all/20240306195509.1502746-2-kuba@kernel.org/
> 
> Yeah, that sounds reasonable! Thanks! We could also keep the
> netdev_stat_ops and add new netdev_queue_ops alongside them if you
> prefer.

Up to you, after some soul searching we renamed the uAPI to call these
stats qstats, I just forgot to rename the op struct. But it doesn't
matter much.

> > Very excited to hear that you made progress on this and ported GVE over!  
> 
> Actually, we're still discussing but it looks like my GVE queue API
> implementation I proposed earlier may be a no-go. Likely someone from
> the GVE team will follow up here with this piece, probably in a
> separate series.

Well, it's going to be ready when it's ready :)
Speaking of things which can be merged independently,
feel free to post patch 3, maybe it can make v6.9..

> For now I'm carrying my POC for the GVE implementation out of tree
> with the rest of the driver changes:
> 
> https://github.com/mina/linux/commit/501b734c80186545281e9edb1bf313f5a2d8cbee
David Wei March 8, 2024, 11:47 p.m. UTC | #4
On 2024-03-04 18:01, Mina Almasry wrote:
> This API enables the net stack to reset the queues used for devmem.
> 
> Signed-off-by: Mina Almasry <almasrymina@google.com>
> 
> ---
>  include/linux/netdevice.h | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index c41019f34179..3105c586355d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1435,6 +1435,20 @@ struct netdev_net_notifier {
>   *			   struct kernel_hwtstamp_config *kernel_config,
>   *			   struct netlink_ext_ack *extack);
>   *	Change the hardware timestamping parameters for NIC device.
> + *
> + * void *(*ndo_queue_mem_alloc)(struct net_device *dev, int idx);
> + *	Allocate memory for an RX queue. The memory returned in the form of
> + *	a void * can be passed to ndo_queue_mem_free() for freeing or to
> + *	ndo_queue_start to create an RX queue with this memory.
> + *
> + * void	(*ndo_queue_mem_free)(struct net_device *dev, void *);
> + *	Free memory from an RX queue.
> + *
> + * int (*ndo_queue_start)(struct net_device *dev, int idx, void *);
> + *	Start an RX queue at the specified index.
> + *
> + * int (*ndo_queue_stop)(struct net_device *dev, int idx, void **);
> + *	Stop the RX queue at the specified index.
>   */
>  struct net_device_ops {
>  	int			(*ndo_init)(struct net_device *dev);
> @@ -1679,6 +1693,16 @@ struct net_device_ops {
>  	int			(*ndo_hwtstamp_set)(struct net_device *dev,
>  						    struct kernel_hwtstamp_config *kernel_config,
>  						    struct netlink_ext_ack *extack);
> +	void *			(*ndo_queue_mem_alloc)(struct net_device *dev,
> +						       int idx);
> +	void			(*ndo_queue_mem_free)(struct net_device *dev,
> +						      void *queue_mem);
> +	int			(*ndo_queue_start)(struct net_device *dev,
> +						   int idx,
> +						   void *queue_mem);
> +	int			(*ndo_queue_stop)(struct net_device *dev,
> +						  int idx,
> +						  void **out_queue_mem);
>  };

I'm working to port bnxt over to using this API. What are your thoughts
on maybe pulling this out and use bnxt to drive it?

>  
>  /**
Mina Almasry March 9, 2024, 12:27 a.m. UTC | #5
On Fri, Mar 8, 2024 at 3:48 PM David Wei <dw@davidwei.uk> wrote:
>
> On 2024-03-04 18:01, Mina Almasry wrote:
> > This API enables the net stack to reset the queues used for devmem.
> >
> > Signed-off-by: Mina Almasry <almasrymina@google.com>
> >
> > ---
> >  include/linux/netdevice.h | 24 ++++++++++++++++++++++++
> >  1 file changed, 24 insertions(+)
> >
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index c41019f34179..3105c586355d 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -1435,6 +1435,20 @@ struct netdev_net_notifier {
> >   *                      struct kernel_hwtstamp_config *kernel_config,
> >   *                      struct netlink_ext_ack *extack);
> >   *   Change the hardware timestamping parameters for NIC device.
> > + *
> > + * void *(*ndo_queue_mem_alloc)(struct net_device *dev, int idx);
> > + *   Allocate memory for an RX queue. The memory returned in the form of
> > + *   a void * can be passed to ndo_queue_mem_free() for freeing or to
> > + *   ndo_queue_start to create an RX queue with this memory.
> > + *
> > + * void      (*ndo_queue_mem_free)(struct net_device *dev, void *);
> > + *   Free memory from an RX queue.
> > + *
> > + * int (*ndo_queue_start)(struct net_device *dev, int idx, void *);
> > + *   Start an RX queue at the specified index.
> > + *
> > + * int (*ndo_queue_stop)(struct net_device *dev, int idx, void **);
> > + *   Stop the RX queue at the specified index.
> >   */
> >  struct net_device_ops {
> >       int                     (*ndo_init)(struct net_device *dev);
> > @@ -1679,6 +1693,16 @@ struct net_device_ops {
> >       int                     (*ndo_hwtstamp_set)(struct net_device *dev,
> >                                                   struct kernel_hwtstamp_config *kernel_config,
> >                                                   struct netlink_ext_ack *extack);
> > +     void *                  (*ndo_queue_mem_alloc)(struct net_device *dev,
> > +                                                    int idx);
> > +     void                    (*ndo_queue_mem_free)(struct net_device *dev,
> > +                                                   void *queue_mem);
> > +     int                     (*ndo_queue_start)(struct net_device *dev,
> > +                                                int idx,
> > +                                                void *queue_mem);
> > +     int                     (*ndo_queue_stop)(struct net_device *dev,
> > +                                               int idx,
> > +                                               void **out_queue_mem);
> >  };
>
> I'm working to port bnxt over to using this API. What are your thoughts
> on maybe pulling this out and use bnxt to drive it?
>

Sure thing, go for it! Thanks!

I think we've going to have someone from GVE working on this in
parallel. I see no issue with us aligning on what the core-net ndos
would look like and implementing those in parallel for both drivers.
We're not currently planning to make any changes to the ndos besides
applying Jakub's feedback from this thread. If you find a need to
deviate from this, let us know and we'll work on staying in line with
that. Thanks!
David Ahern March 11, 2024, 1:12 a.m. UTC | #6
On 3/8/24 4:47 PM, David Wei wrote:
> 
> I'm working to port bnxt over to using this API. What are your thoughts
> on maybe pulling this out and use bnxt to drive it?
> 

I would love to see a second nic implementation; this patch set and
overall design is driven by GVE limitations.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c41019f34179..3105c586355d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1435,6 +1435,20 @@  struct netdev_net_notifier {
  *			   struct kernel_hwtstamp_config *kernel_config,
  *			   struct netlink_ext_ack *extack);
  *	Change the hardware timestamping parameters for NIC device.
+ *
+ * void *(*ndo_queue_mem_alloc)(struct net_device *dev, int idx);
+ *	Allocate memory for an RX queue. The memory returned in the form of
+ *	a void * can be passed to ndo_queue_mem_free() for freeing or to
+ *	ndo_queue_start to create an RX queue with this memory.
+ *
+ * void	(*ndo_queue_mem_free)(struct net_device *dev, void *);
+ *	Free memory from an RX queue.
+ *
+ * int (*ndo_queue_start)(struct net_device *dev, int idx, void *);
+ *	Start an RX queue at the specified index.
+ *
+ * int (*ndo_queue_stop)(struct net_device *dev, int idx, void **);
+ *	Stop the RX queue at the specified index.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1679,6 +1693,16 @@  struct net_device_ops {
 	int			(*ndo_hwtstamp_set)(struct net_device *dev,
 						    struct kernel_hwtstamp_config *kernel_config,
 						    struct netlink_ext_ack *extack);
+	void *			(*ndo_queue_mem_alloc)(struct net_device *dev,
+						       int idx);
+	void			(*ndo_queue_mem_free)(struct net_device *dev,
+						      void *queue_mem);
+	int			(*ndo_queue_start)(struct net_device *dev,
+						   int idx,
+						   void *queue_mem);
+	int			(*ndo_queue_stop)(struct net_device *dev,
+						  int idx,
+						  void **out_queue_mem);
 };
 
 /**