diff mbox series

[nf-next] netfilter: nf_conntrack: speed up reads from nf_conntrack proc file

Message ID 20250211130313.31433-1-fw@strlen.de
State Changes Requested, archived
Headers show
Series [nf-next] netfilter: nf_conntrack: speed up reads from nf_conntrack proc file | expand

Commit Message

Florian Westphal Feb. 11, 2025, 1:03 p.m. UTC
Dumping all conntrack entries via proc interface can take hours due to
linear search to skip entries dumped so far in each cycle.

Apply same strategy used to speed up ipvs proc reading done in
commit 178883fd039d ("ipvs: speed up reads from ip_vs_conn proc file")
to nf_conntrack.

Note that the ctnetlink interface doesn't suffer from this problem.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_conntrack_standalone.c | 73 +++++++++++++------------
 1 file changed, 38 insertions(+), 35 deletions(-)

Comments

Pablo Neira Ayuso March 12, 2025, 4:54 p.m. UTC | #1
Hi Florian,

a few comments below.

On Tue, Feb 11, 2025 at 02:03:06PM +0100, Florian Westphal wrote:
> Dumping all conntrack entries via proc interface can take hours due to
> linear search to skip entries dumped so far in each cycle.
> 
> Apply same strategy used to speed up ipvs proc reading done in
> commit 178883fd039d ("ipvs: speed up reads from ip_vs_conn proc file")
> to nf_conntrack.
> 
> Note that the ctnetlink interface doesn't suffer from this problem.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/netfilter/nf_conntrack_standalone.c | 73 +++++++++++++------------
>  1 file changed, 38 insertions(+), 35 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
> index 502cf10aab41..2a79e690470a 100644
> --- a/net/netfilter/nf_conntrack_standalone.c
> +++ b/net/netfilter/nf_conntrack_standalone.c
> @@ -98,51 +98,34 @@ struct ct_iter_state {
>  	struct seq_net_private p;
>  	struct hlist_nulls_head *hash;
>  	unsigned int htable_size;
> +	unsigned int skip_elems;
>  	unsigned int bucket;
>  	u_int64_t time_now;
>  };
>  
> -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
> +static struct nf_conntrack_tuple_hash *ct_get_next(struct ct_iter_state *st)
>  {
> -	struct ct_iter_state *st = seq->private;
> +	struct nf_conntrack_tuple_hash *h;
>  	struct hlist_nulls_node *n;
> +	unsigned int i;
>  
> -	for (st->bucket = 0;
> -	     st->bucket < st->htable_size;
> -	     st->bucket++) {
> -		n = rcu_dereference(
> -			hlist_nulls_first_rcu(&st->hash[st->bucket]));
> -		if (!is_a_nulls(n))
> -			return n;
> -	}
> -	return NULL;
> -}
> +	for (i = st->bucket; i < st->htable_size; i++) {
> +		unsigned int skip = 0;
>  
> -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
> -				      struct hlist_nulls_node *head)
> -{
> -	struct ct_iter_state *st = seq->private;
> +		hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) {

                hlist_nulls_for_each_entry_rcu ?

> +			if (skip >= st->skip_elems) {
> +				st->bucket = i;
> +				return h;
> +			}
>  
> -	head = rcu_dereference(hlist_nulls_next_rcu(head));
> -	while (is_a_nulls(head)) {
> -		if (likely(get_nulls_value(head) == st->bucket)) {
> -			if (++st->bucket >= st->htable_size)
> -				return NULL;
> +			++skip;
>  		}
> -		head = rcu_dereference(
> -			hlist_nulls_first_rcu(&st->hash[st->bucket]));

This does not rewind if get_nulls_value(head) != st->bucket),
not needed anymore?

> -	}
> -	return head;
> -}
>  
> -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
> -{
> -	struct hlist_nulls_node *head = ct_get_first(seq);
> +		st->skip_elems = 0;
> +	}
>  
> -	if (head)
> -		while (pos && (head = ct_get_next(seq, head)))
> -			pos--;
> -	return pos ? NULL : head;
> +	st->bucket = i;
> +	return NULL;
>  }
>  
>  static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
> @@ -154,13 +137,33 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
>  	rcu_read_lock();
>  
>  	nf_conntrack_get_ht(&st->hash, &st->htable_size);
> -	return ct_get_idx(seq, *pos);
> +
> +	if (*pos == 0) {
> +		st->skip_elems = 0;
> +		st->bucket = 0;
> +	}
> +
> +	return ct_get_next(st);
>  }
>  
>  static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
>  {
> +	struct nf_conntrack_tuple_hash *h = v;
> +	struct ct_iter_state *st = s->private;
> +	struct hlist_nulls_node *n;
> +
>  	(*pos)++;
> -	return ct_get_next(s, v);
> +
> +	/* more on same hash chain? */
> +	n = rcu_dereference(hlist_nulls_next_rcu(&h->hnnode));
> +	if (n && !is_a_nulls(n)) {
> +		st->skip_elems++;
> +		return hlist_nulls_entry(n, struct nf_conntrack_tuple_hash, hnnode);
> +	}
> +
> +	st->skip_elems = 0;
> +	st->bucket++;
> +	return ct_get_next(st);
>  }
>  
>  static void ct_seq_stop(struct seq_file *s, void *v)
> -- 
> 2.45.3
> 
>
Florian Westphal March 12, 2025, 6:28 p.m. UTC | #2
Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > -	struct ct_iter_state *st = seq->private;
> > +		hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) {
> 
>                 hlist_nulls_for_each_entry_rcu ?

Yes.

> > -		if (likely(get_nulls_value(head) == st->bucket)) {
> > -			if (++st->bucket >= st->htable_size)
> > -				return NULL;
> > +			++skip;
> >  		}
> > -		head = rcu_dereference(
> > -			hlist_nulls_first_rcu(&st->hash[st->bucket]));
> 
> This does not rewind if get_nulls_value(head) != st->bucket),
> not needed anymore?

There are only two choices:
1. rewind and (possibly) dump entries more than once
2. skip to next and miss an entry

I'm not sure whats worse/better.
Pablo Neira Ayuso March 12, 2025, 7:45 p.m. UTC | #3
On Wed, Mar 12, 2025 at 07:28:38PM +0100, Florian Westphal wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > -	struct ct_iter_state *st = seq->private;
> > > +		hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) {
> > 
> >                 hlist_nulls_for_each_entry_rcu ?
> 
> Yes.
> 
> > > -		if (likely(get_nulls_value(head) == st->bucket)) {
> > > -			if (++st->bucket >= st->htable_size)
> > > -				return NULL;
> > > +			++skip;
> > >  		}
> > > -		head = rcu_dereference(
> > > -			hlist_nulls_first_rcu(&st->hash[st->bucket]));
> > 
> > This does not rewind if get_nulls_value(head) != st->bucket),
> > not needed anymore?
> 
> There are only two choices:
> 1. rewind and (possibly) dump entries more than once
> 2. skip to next and miss an entry

I think we can still display duplicates in 2. too since nulls check if
the iteration finished on another bucket? Then 2. means skipped
entries and duplicates.

> I'm not sure whats worse/better.

Skipping looks simpler, it is less code. But if entries are duplicated
then userspace has a chance to deduplicate?

I am not sure how many entries could be skipped without the nulls
check in practise TBH.

If you prefer to simple and skip entries, I suggest to add this to the
patch description, this is a change in the behaviour that is worth
documenting IMO.

Thanks.
Florian Westphal March 12, 2025, 9:39 p.m. UTC | #4
Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > 2. skip to next and miss an entry
> 
> I think we can still display duplicates in 2. too since nulls check if
> the iteration finished on another bucket? Then 2. means skipped
> entries and duplicates.

Yes, but its not a change in behaviour, this was never a stable walk.

> > I'm not sure whats worse/better.
> 
> Skipping looks simpler, it is less code. But if entries are duplicated
> then userspace has a chance to deduplicate?

Lets just give up and nuke the entire functionality, i.e. /proc
interface removal.
diff mbox series

Patch

diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 502cf10aab41..2a79e690470a 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -98,51 +98,34 @@  struct ct_iter_state {
 	struct seq_net_private p;
 	struct hlist_nulls_head *hash;
 	unsigned int htable_size;
+	unsigned int skip_elems;
 	unsigned int bucket;
 	u_int64_t time_now;
 };
 
-static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
+static struct nf_conntrack_tuple_hash *ct_get_next(struct ct_iter_state *st)
 {
-	struct ct_iter_state *st = seq->private;
+	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
+	unsigned int i;
 
-	for (st->bucket = 0;
-	     st->bucket < st->htable_size;
-	     st->bucket++) {
-		n = rcu_dereference(
-			hlist_nulls_first_rcu(&st->hash[st->bucket]));
-		if (!is_a_nulls(n))
-			return n;
-	}
-	return NULL;
-}
+	for (i = st->bucket; i < st->htable_size; i++) {
+		unsigned int skip = 0;
 
-static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
-				      struct hlist_nulls_node *head)
-{
-	struct ct_iter_state *st = seq->private;
+		hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) {
+			if (skip >= st->skip_elems) {
+				st->bucket = i;
+				return h;
+			}
 
-	head = rcu_dereference(hlist_nulls_next_rcu(head));
-	while (is_a_nulls(head)) {
-		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= st->htable_size)
-				return NULL;
+			++skip;
 		}
-		head = rcu_dereference(
-			hlist_nulls_first_rcu(&st->hash[st->bucket]));
-	}
-	return head;
-}
 
-static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct hlist_nulls_node *head = ct_get_first(seq);
+		st->skip_elems = 0;
+	}
 
-	if (head)
-		while (pos && (head = ct_get_next(seq, head)))
-			pos--;
-	return pos ? NULL : head;
+	st->bucket = i;
+	return NULL;
 }
 
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
@@ -154,13 +137,33 @@  static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
 	rcu_read_lock();
 
 	nf_conntrack_get_ht(&st->hash, &st->htable_size);
-	return ct_get_idx(seq, *pos);
+
+	if (*pos == 0) {
+		st->skip_elems = 0;
+		st->bucket = 0;
+	}
+
+	return ct_get_next(st);
 }
 
 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
+	struct nf_conntrack_tuple_hash *h = v;
+	struct ct_iter_state *st = s->private;
+	struct hlist_nulls_node *n;
+
 	(*pos)++;
-	return ct_get_next(s, v);
+
+	/* more on same hash chain? */
+	n = rcu_dereference(hlist_nulls_next_rcu(&h->hnnode));
+	if (n && !is_a_nulls(n)) {
+		st->skip_elems++;
+		return hlist_nulls_entry(n, struct nf_conntrack_tuple_hash, hnnode);
+	}
+
+	st->skip_elems = 0;
+	st->bucket++;
+	return ct_get_next(st);
 }
 
 static void ct_seq_stop(struct seq_file *s, void *v)