diff mbox series

af_packet: TPACKET_V3: replace busy-wait loop

Message ID 20200707152204.10314-1-john.ogness@linutronix.de
State Accepted
Delegated to: David Miller
Headers show
Series af_packet: TPACKET_V3: replace busy-wait loop | expand

Commit Message

John Ogness July 7, 2020, 3:22 p.m. UTC
A busy-wait loop is used to implement waiting for bits to be copied
from the skb to the kernel buffer before retiring a block. This is
a problem on PREEMPT_RT because the copying task could be preempted
by the busy-waiting task and thus live lock in the busy-wait loop.

Replace the busy-wait logic with an rwlock_t. This provides lockdep
coverage and makes the code RT ready.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 patch against v5.8-rc4

 net/packet/af_packet.c | 20 ++++++++++----------
 net/packet/internal.h  |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

Comments

Jakub Kicinski July 15, 2020, 8:21 p.m. UTC | #1
On Tue,  7 Jul 2020 17:28:04 +0206 John Ogness wrote:
> A busy-wait loop is used to implement waiting for bits to be copied
> from the skb to the kernel buffer before retiring a block. This is
> a problem on PREEMPT_RT because the copying task could be preempted
> by the busy-waiting task and thus live lock in the busy-wait loop.
> 
> Replace the busy-wait logic with an rwlock_t. This provides lockdep
> coverage and makes the code RT ready.
> 
> Signed-off-by: John Ogness <john.ogness@linutronix.de>

Is taking a lock and immediately releasing it better than a completion?
Seems like the lock is guaranteed to dirty a cache line, which would
otherwise be avoided here.

Willem, would you be able to take a look as well? Is this path
performance sensitive in real life?

> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 29bd405adbbd..dd1eec2dd6ef 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -593,6 +593,7 @@ static void init_prb_bdqc(struct packet_sock *po,
>  						req_u->req3.tp_block_size);
>  	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
>  	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
> +	rwlock_init(&p1->blk_fill_in_prog_lock);
>  
>  	p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
>  	prb_init_ft_ops(p1, req_u);
> @@ -659,10 +660,9 @@ static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
>  	 *
>  	 */
>  	if (BLOCK_NUM_PKTS(pbd)) {
> -		while (atomic_read(&pkc->blk_fill_in_prog)) {
> -			/* Waiting for skb_copy_bits to finish... */
> -			cpu_relax();
> -		}
> +		/* Waiting for skb_copy_bits to finish... */
> +		write_lock(&pkc->blk_fill_in_prog_lock);
> +		write_unlock(&pkc->blk_fill_in_prog_lock);
>  	}
>  
>  	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
> @@ -921,10 +921,9 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
>  		 * the timer-handler already handled this case.
>  		 */
>  		if (!(status & TP_STATUS_BLK_TMO)) {
> -			while (atomic_read(&pkc->blk_fill_in_prog)) {
> -				/* Waiting for skb_copy_bits to finish... */
> -				cpu_relax();
> -			}
> +			/* Waiting for skb_copy_bits to finish... */
> +			write_lock(&pkc->blk_fill_in_prog_lock);
> +			write_unlock(&pkc->blk_fill_in_prog_lock);
>  		}
>  		prb_close_block(pkc, pbd, po, status);
>  		return;
> @@ -944,7 +943,8 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
>  static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
>  {
>  	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
> -	atomic_dec(&pkc->blk_fill_in_prog);
> +
> +	read_unlock(&pkc->blk_fill_in_prog_lock);
>  }
>  
>  static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
> @@ -998,7 +998,7 @@ static void prb_fill_curr_block(char *curr,
>  	pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
>  	BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
>  	BLOCK_NUM_PKTS(pbd) += 1;
> -	atomic_inc(&pkc->blk_fill_in_prog);
> +	read_lock(&pkc->blk_fill_in_prog_lock);
>  	prb_run_all_ft_ops(pkc, ppd);
>  }
>  
> diff --git a/net/packet/internal.h b/net/packet/internal.h
> index 907f4cd2a718..fd41ecb7f605 100644
> --- a/net/packet/internal.h
> +++ b/net/packet/internal.h
> @@ -39,7 +39,7 @@ struct tpacket_kbdq_core {
>  	char		*nxt_offset;
>  	struct sk_buff	*skb;
>  
> -	atomic_t	blk_fill_in_prog;
> +	rwlock_t	blk_fill_in_prog_lock;
>  
>  	/* Default is set to 8ms */
>  #define DEFAULT_PRB_RETIRE_TOV	(8)
Willem de Bruijn July 15, 2020, 10:35 p.m. UTC | #2
On Wed, Jul 15, 2020 at 4:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue,  7 Jul 2020 17:28:04 +0206 John Ogness wrote:
> > A busy-wait loop is used to implement waiting for bits to be copied
> > from the skb to the kernel buffer before retiring a block. This is
> > a problem on PREEMPT_RT because the copying task could be preempted
> > by the busy-waiting task and thus live lock in the busy-wait loop.
> >
> > Replace the busy-wait logic with an rwlock_t. This provides lockdep
> > coverage and makes the code RT ready.
> >
> > Signed-off-by: John Ogness <john.ogness@linutronix.de>
>
> Is taking a lock and immediately releasing it better than a completion?
> Seems like the lock is guaranteed to dirty a cache line, which would
> otherwise be avoided here.
>
> Willem, would you be able to take a look as well? Is this path
> performance sensitive in real life?

No objections from me.

I guess this resolves the issue on preempt_rt, because the spinlocks act as
mutexes. It will still spin on write_lock otherwise, no huge difference from
existing logic.





>
> > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> > index 29bd405adbbd..dd1eec2dd6ef 100644
> > --- a/net/packet/af_packet.c
> > +++ b/net/packet/af_packet.c
> > @@ -593,6 +593,7 @@ static void init_prb_bdqc(struct packet_sock *po,
> >                                               req_u->req3.tp_block_size);
> >       p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
> >       p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
> > +     rwlock_init(&p1->blk_fill_in_prog_lock);
> >
> >       p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
> >       prb_init_ft_ops(p1, req_u);
> > @@ -659,10 +660,9 @@ static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
> >        *
> >        */
> >       if (BLOCK_NUM_PKTS(pbd)) {
> > -             while (atomic_read(&pkc->blk_fill_in_prog)) {
> > -                     /* Waiting for skb_copy_bits to finish... */
> > -                     cpu_relax();
> > -             }
> > +             /* Waiting for skb_copy_bits to finish... */
> > +             write_lock(&pkc->blk_fill_in_prog_lock);
> > +             write_unlock(&pkc->blk_fill_in_prog_lock);
> >       }
> >
> >       if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
> > @@ -921,10 +921,9 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
> >                * the timer-handler already handled this case.
> >                */
> >               if (!(status & TP_STATUS_BLK_TMO)) {
> > -                     while (atomic_read(&pkc->blk_fill_in_prog)) {
> > -                             /* Waiting for skb_copy_bits to finish... */
> > -                             cpu_relax();
> > -                     }
> > +                     /* Waiting for skb_copy_bits to finish... */
> > +                     write_lock(&pkc->blk_fill_in_prog_lock);
> > +                     write_unlock(&pkc->blk_fill_in_prog_lock);
> >               }
> >               prb_close_block(pkc, pbd, po, status);
> >               return;
> > @@ -944,7 +943,8 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
> >  static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
> >  {
> >       struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
> > -     atomic_dec(&pkc->blk_fill_in_prog);
> > +
> > +     read_unlock(&pkc->blk_fill_in_prog_lock);
> >  }
> >
> >  static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
> > @@ -998,7 +998,7 @@ static void prb_fill_curr_block(char *curr,
> >       pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
> >       BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
> >       BLOCK_NUM_PKTS(pbd) += 1;
> > -     atomic_inc(&pkc->blk_fill_in_prog);
> > +     read_lock(&pkc->blk_fill_in_prog_lock);
> >       prb_run_all_ft_ops(pkc, ppd);
> >  }
> >
> > diff --git a/net/packet/internal.h b/net/packet/internal.h
> > index 907f4cd2a718..fd41ecb7f605 100644
> > --- a/net/packet/internal.h
> > +++ b/net/packet/internal.h
> > @@ -39,7 +39,7 @@ struct tpacket_kbdq_core {
> >       char            *nxt_offset;
> >       struct sk_buff  *skb;
> >
> > -     atomic_t        blk_fill_in_prog;
> > +     rwlock_t        blk_fill_in_prog_lock;
> >
> >       /* Default is set to 8ms */
> >  #define DEFAULT_PRB_RETIRE_TOV       (8)
>
Jakub Kicinski July 16, 2020, 12:22 a.m. UTC | #3
On Wed, 15 Jul 2020 18:35:00 -0400 Willem de Bruijn wrote:
> On Wed, Jul 15, 2020 at 4:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
> >
> > On Tue,  7 Jul 2020 17:28:04 +0206 John Ogness wrote:  
> > > A busy-wait loop is used to implement waiting for bits to be copied
> > > from the skb to the kernel buffer before retiring a block. This is
> > > a problem on PREEMPT_RT because the copying task could be preempted
> > > by the busy-waiting task and thus live lock in the busy-wait loop.
> > >
> > > Replace the busy-wait logic with an rwlock_t. This provides lockdep
> > > coverage and makes the code RT ready.
> > >
> > > Signed-off-by: John Ogness <john.ogness@linutronix.de>  
> >
> > Is taking a lock and immediately releasing it better than a completion?
> > Seems like the lock is guaranteed to dirty a cache line, which would
> > otherwise be avoided here.
> >
> > Willem, would you be able to take a look as well? Is this path
> > performance sensitive in real life?  
> 
> No objections from me.
> 
> I guess this resolves the issue on preempt_rt, because the spinlocks act as
> mutexes. It will still spin on write_lock otherwise, no huge difference from
> existing logic.

Thanks!

If no one else objects I'm putting this in net-next.

Seems a little late for 5.8.
diff mbox series

Patch

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 29bd405adbbd..dd1eec2dd6ef 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -593,6 +593,7 @@  static void init_prb_bdqc(struct packet_sock *po,
 						req_u->req3.tp_block_size);
 	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
 	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+	rwlock_init(&p1->blk_fill_in_prog_lock);
 
 	p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
 	prb_init_ft_ops(p1, req_u);
@@ -659,10 +660,9 @@  static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
 	 *
 	 */
 	if (BLOCK_NUM_PKTS(pbd)) {
-		while (atomic_read(&pkc->blk_fill_in_prog)) {
-			/* Waiting for skb_copy_bits to finish... */
-			cpu_relax();
-		}
+		/* Waiting for skb_copy_bits to finish... */
+		write_lock(&pkc->blk_fill_in_prog_lock);
+		write_unlock(&pkc->blk_fill_in_prog_lock);
 	}
 
 	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
@@ -921,10 +921,9 @@  static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
 		 * the timer-handler already handled this case.
 		 */
 		if (!(status & TP_STATUS_BLK_TMO)) {
-			while (atomic_read(&pkc->blk_fill_in_prog)) {
-				/* Waiting for skb_copy_bits to finish... */
-				cpu_relax();
-			}
+			/* Waiting for skb_copy_bits to finish... */
+			write_lock(&pkc->blk_fill_in_prog_lock);
+			write_unlock(&pkc->blk_fill_in_prog_lock);
 		}
 		prb_close_block(pkc, pbd, po, status);
 		return;
@@ -944,7 +943,8 @@  static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
 static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
 {
 	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
-	atomic_dec(&pkc->blk_fill_in_prog);
+
+	read_unlock(&pkc->blk_fill_in_prog_lock);
 }
 
 static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
@@ -998,7 +998,7 @@  static void prb_fill_curr_block(char *curr,
 	pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
 	BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
 	BLOCK_NUM_PKTS(pbd) += 1;
-	atomic_inc(&pkc->blk_fill_in_prog);
+	read_lock(&pkc->blk_fill_in_prog_lock);
 	prb_run_all_ft_ops(pkc, ppd);
 }
 
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 907f4cd2a718..fd41ecb7f605 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -39,7 +39,7 @@  struct tpacket_kbdq_core {
 	char		*nxt_offset;
 	struct sk_buff	*skb;
 
-	atomic_t	blk_fill_in_prog;
+	rwlock_t	blk_fill_in_prog_lock;
 
 	/* Default is set to 8ms */
 #define DEFAULT_PRB_RETIRE_TOV	(8)