Message ID | 20250211130313.31433-1-fw@strlen.de |
---|---|
State | Changes Requested, archived |
Headers | show |
Series | [nf-next] netfilter: nf_conntrack: speed up reads from nf_conntrack proc file | expand |
Hi Florian, a few comments below. On Tue, Feb 11, 2025 at 02:03:06PM +0100, Florian Westphal wrote: > Dumping all conntrack entries via proc interface can take hours due to > linear search to skip entries dumped so far in each cycle. > > Apply same strategy used to speed up ipvs proc reading done in > commit 178883fd039d ("ipvs: speed up reads from ip_vs_conn proc file") > to nf_conntrack. > > Note that the ctnetlink interface doesn't suffer from this problem. > > Signed-off-by: Florian Westphal <fw@strlen.de> > --- > net/netfilter/nf_conntrack_standalone.c | 73 +++++++++++++------------ > 1 file changed, 38 insertions(+), 35 deletions(-) > > diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c > index 502cf10aab41..2a79e690470a 100644 > --- a/net/netfilter/nf_conntrack_standalone.c > +++ b/net/netfilter/nf_conntrack_standalone.c > @@ -98,51 +98,34 @@ struct ct_iter_state { > struct seq_net_private p; > struct hlist_nulls_head *hash; > unsigned int htable_size; > + unsigned int skip_elems; > unsigned int bucket; > u_int64_t time_now; > }; > > -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) > +static struct nf_conntrack_tuple_hash *ct_get_next(struct ct_iter_state *st) > { > - struct ct_iter_state *st = seq->private; > + struct nf_conntrack_tuple_hash *h; > struct hlist_nulls_node *n; > + unsigned int i; > > - for (st->bucket = 0; > - st->bucket < st->htable_size; > - st->bucket++) { > - n = rcu_dereference( > - hlist_nulls_first_rcu(&st->hash[st->bucket])); > - if (!is_a_nulls(n)) > - return n; > - } > - return NULL; > -} > + for (i = st->bucket; i < st->htable_size; i++) { > + unsigned int skip = 0; > > -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, > - struct hlist_nulls_node *head) > -{ > - struct ct_iter_state *st = seq->private; > + hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) { hlist_nulls_for_each_entry_rcu ? > + if (skip >= st->skip_elems) { > + st->bucket = i; > + return h; > + } > > - head = rcu_dereference(hlist_nulls_next_rcu(head)); > - while (is_a_nulls(head)) { > - if (likely(get_nulls_value(head) == st->bucket)) { > - if (++st->bucket >= st->htable_size) > - return NULL; > + ++skip; > } > - head = rcu_dereference( > - hlist_nulls_first_rcu(&st->hash[st->bucket])); This does not rewind if get_nulls_value(head) != st->bucket), not needed anymore? > - } > - return head; > -} > > -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) > -{ > - struct hlist_nulls_node *head = ct_get_first(seq); > + st->skip_elems = 0; > + } > > - if (head) > - while (pos && (head = ct_get_next(seq, head))) > - pos--; > - return pos ? NULL : head; > + st->bucket = i; > + return NULL; > } > > static void *ct_seq_start(struct seq_file *seq, loff_t *pos) > @@ -154,13 +137,33 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos) > rcu_read_lock(); > > nf_conntrack_get_ht(&st->hash, &st->htable_size); > - return ct_get_idx(seq, *pos); > + > + if (*pos == 0) { > + st->skip_elems = 0; > + st->bucket = 0; > + } > + > + return ct_get_next(st); > } > > static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) > { > + struct nf_conntrack_tuple_hash *h = v; > + struct ct_iter_state *st = s->private; > + struct hlist_nulls_node *n; > + > (*pos)++; > - return ct_get_next(s, v); > + > + /* more on same hash chain? */ > + n = rcu_dereference(hlist_nulls_next_rcu(&h->hnnode)); > + if (n && !is_a_nulls(n)) { > + st->skip_elems++; > + return hlist_nulls_entry(n, struct nf_conntrack_tuple_hash, hnnode); > + } > + > + st->skip_elems = 0; > + st->bucket++; > + return ct_get_next(st); > } > > static void ct_seq_stop(struct seq_file *s, void *v) > -- > 2.45.3 > >
Pablo Neira Ayuso <pablo@netfilter.org> wrote: > > - struct ct_iter_state *st = seq->private; > > + hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) { > > hlist_nulls_for_each_entry_rcu ? Yes. > > - if (likely(get_nulls_value(head) == st->bucket)) { > > - if (++st->bucket >= st->htable_size) > > - return NULL; > > + ++skip; > > } > > - head = rcu_dereference( > > - hlist_nulls_first_rcu(&st->hash[st->bucket])); > > This does not rewind if get_nulls_value(head) != st->bucket), > not needed anymore? There are only two choices: 1. rewind and (possibly) dump entries more than once 2. skip to next and miss an entry I'm not sure whats worse/better.
On Wed, Mar 12, 2025 at 07:28:38PM +0100, Florian Westphal wrote: > Pablo Neira Ayuso <pablo@netfilter.org> wrote: > > > - struct ct_iter_state *st = seq->private; > > > + hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) { > > > > hlist_nulls_for_each_entry_rcu ? > > Yes. > > > > - if (likely(get_nulls_value(head) == st->bucket)) { > > > - if (++st->bucket >= st->htable_size) > > > - return NULL; > > > + ++skip; > > > } > > > - head = rcu_dereference( > > > - hlist_nulls_first_rcu(&st->hash[st->bucket])); > > > > This does not rewind if get_nulls_value(head) != st->bucket), > > not needed anymore? > > There are only two choices: > 1. rewind and (possibly) dump entries more than once > 2. skip to next and miss an entry I think we can still display duplicates in 2. too since nulls check if the iteration finished on another bucket? Then 2. means skipped entries and duplicates. > I'm not sure whats worse/better. Skipping looks simpler, it is less code. But if entries are duplicated then userspace has a chance to deduplicate? I am not sure how many entries could be skipped without the nulls check in practise TBH. If you prefer to simple and skip entries, I suggest to add this to the patch description, this is a change in the behaviour that is worth documenting IMO. Thanks.
Pablo Neira Ayuso <pablo@netfilter.org> wrote: > > 2. skip to next and miss an entry > > I think we can still display duplicates in 2. too since nulls check if > the iteration finished on another bucket? Then 2. means skipped > entries and duplicates. Yes, but its not a change in behaviour, this was never a stable walk. > > I'm not sure whats worse/better. > > Skipping looks simpler, it is less code. But if entries are duplicated > then userspace has a chance to deduplicate? Lets just give up and nuke the entire functionality, i.e. /proc interface removal.
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 502cf10aab41..2a79e690470a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -98,51 +98,34 @@ struct ct_iter_state { struct seq_net_private p; struct hlist_nulls_head *hash; unsigned int htable_size; + unsigned int skip_elems; unsigned int bucket; u_int64_t time_now; }; -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) +static struct nf_conntrack_tuple_hash *ct_get_next(struct ct_iter_state *st) { - struct ct_iter_state *st = seq->private; + struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; + unsigned int i; - for (st->bucket = 0; - st->bucket < st->htable_size; - st->bucket++) { - n = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - if (!is_a_nulls(n)) - return n; - } - return NULL; -} + for (i = st->bucket; i < st->htable_size; i++) { + unsigned int skip = 0; -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, - struct hlist_nulls_node *head) -{ - struct ct_iter_state *st = seq->private; + hlist_nulls_for_each_entry(h, n, &st->hash[i], hnnode) { + if (skip >= st->skip_elems) { + st->bucket = i; + return h; + } - head = rcu_dereference(hlist_nulls_next_rcu(head)); - while (is_a_nulls(head)) { - if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= st->htable_size) - return NULL; + ++skip; } - head = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - } - return head; -} -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_nulls_node *head = ct_get_first(seq); + st->skip_elems = 0; + } - if (head) - while (pos && (head = ct_get_next(seq, head))) - pos--; - return pos ? NULL : head; + st->bucket = i; + return NULL; } static void *ct_seq_start(struct seq_file *seq, loff_t *pos) @@ -154,13 +137,33 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos) rcu_read_lock(); nf_conntrack_get_ht(&st->hash, &st->htable_size); - return ct_get_idx(seq, *pos); + + if (*pos == 0) { + st->skip_elems = 0; + st->bucket = 0; + } + + return ct_get_next(st); } static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) { + struct nf_conntrack_tuple_hash *h = v; + struct ct_iter_state *st = s->private; + struct hlist_nulls_node *n; + (*pos)++; - return ct_get_next(s, v); + + /* more on same hash chain? */ + n = rcu_dereference(hlist_nulls_next_rcu(&h->hnnode)); + if (n && !is_a_nulls(n)) { + st->skip_elems++; + return hlist_nulls_entry(n, struct nf_conntrack_tuple_hash, hnnode); + } + + st->skip_elems = 0; + st->bucket++; + return ct_get_next(st); } static void ct_seq_stop(struct seq_file *s, void *v)
Dumping all conntrack entries via proc interface can take hours due to linear search to skip entries dumped so far in each cycle. Apply same strategy used to speed up ipvs proc reading done in commit 178883fd039d ("ipvs: speed up reads from ip_vs_conn proc file") to nf_conntrack. Note that the ctnetlink interface doesn't suffer from this problem. Signed-off-by: Florian Westphal <fw@strlen.de> --- net/netfilter/nf_conntrack_standalone.c | 73 +++++++++++++------------ 1 file changed, 38 insertions(+), 35 deletions(-)