diff mbox series

[bpf-next,7/8] samples/bpf: add sample program that periodically dumps TCP stats

Message ID 20190701204821.44230-8-sdf@google.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: TCP RTT sock_ops bpf callback | expand

Commit Message

Stanislav Fomichev July 1, 2019, 8:48 p.m. UTC
Uses new RTT callback to dump stats every second.

$ mkdir -p /tmp/cgroupv2
$ mount -t cgroup2 none /tmp/cgroupv2
$ mkdir -p /tmp/cgroupv2/foo
$ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
$ bpftool prog load ./tcp_dumpstats_kern.o /sys/fs/bpf/tcp_prog
$ bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
$ bpftool prog tracelog
$ # run neper/netperf/etc

Used neper to compare performance with and without this program attached
and didn't see any noticeable performance impact.

Sample output:
  <idle>-0     [015] ..s.  2074.128800: 0: dsack_dups=0 delivered=242526
  <idle>-0     [015] ..s.  2074.128808: 0: delivered_ce=0 icsk_retransmits=0
  <idle>-0     [015] ..s.  2075.130133: 0: dsack_dups=0 delivered=323599
  <idle>-0     [015] ..s.  2075.130138: 0: delivered_ce=0 icsk_retransmits=0
  <idle>-0     [005] .Ns.  2076.131440: 0: dsack_dups=0 delivered=404648
  <idle>-0     [005] .Ns.  2076.131447: 0: delivered_ce=0 icsk_retransmits=0

Cc: Eric Dumazet <edumazet@google.com>
Cc: Priyaranjan Jha <priyarjha@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 samples/bpf/Makefile             |  1 +
 samples/bpf/tcp_dumpstats_kern.c | 65 ++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 samples/bpf/tcp_dumpstats_kern.c

Comments

Y Song July 2, 2019, 12:15 a.m. UTC | #1
On Mon, Jul 1, 2019 at 1:49 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> Uses new RTT callback to dump stats every second.
>
> $ mkdir -p /tmp/cgroupv2
> $ mount -t cgroup2 none /tmp/cgroupv2
> $ mkdir -p /tmp/cgroupv2/foo
> $ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
> $ bpftool prog load ./tcp_dumpstats_kern.o /sys/fs/bpf/tcp_prog
> $ bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
> $ bpftool prog tracelog
> $ # run neper/netperf/etc
>
> Used neper to compare performance with and without this program attached
> and didn't see any noticeable performance impact.
>
> Sample output:
>   <idle>-0     [015] ..s.  2074.128800: 0: dsack_dups=0 delivered=242526
>   <idle>-0     [015] ..s.  2074.128808: 0: delivered_ce=0 icsk_retransmits=0
>   <idle>-0     [015] ..s.  2075.130133: 0: dsack_dups=0 delivered=323599
>   <idle>-0     [015] ..s.  2075.130138: 0: delivered_ce=0 icsk_retransmits=0
>   <idle>-0     [005] .Ns.  2076.131440: 0: dsack_dups=0 delivered=404648
>   <idle>-0     [005] .Ns.  2076.131447: 0: delivered_ce=0 icsk_retransmits=0
>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Priyaranjan Jha <priyarjha@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Soheil Hassas Yeganeh <soheil@google.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  samples/bpf/Makefile             |  1 +
>  samples/bpf/tcp_dumpstats_kern.c | 65 ++++++++++++++++++++++++++++++++
>  2 files changed, 66 insertions(+)
>  create mode 100644 samples/bpf/tcp_dumpstats_kern.c

Currently, the bpf program into the repo. If we do not have another
script to use
this program for testing, the instructions in the commit message should be
added to the bpf program as comments so people know what to do with this file
without going through git commit message.

Is it possible to create a script to run with this bpf program?

>
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index 0917f8cf4fab..eaebbeead42f 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -154,6 +154,7 @@ always += tcp_iw_kern.o
>  always += tcp_clamp_kern.o
>  always += tcp_basertt_kern.o
>  always += tcp_tos_reflect_kern.o
> +always += tcp_dumpstats_kern.o
>  always += xdp_redirect_kern.o
>  always += xdp_redirect_map_kern.o
>  always += xdp_redirect_cpu_kern.o
> diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
> new file mode 100644
> index 000000000000..5d22bf61db65
> --- /dev/null
> +++ b/samples/bpf/tcp_dumpstats_kern.c
> @@ -0,0 +1,65 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +
> +#include "bpf_helpers.h"
> +#include "bpf_endian.h"
> +
> +#define INTERVAL                       1000000000ULL
> +
> +int _version SEC("version") = 1;
> +char _license[] SEC("license") = "GPL";
> +
> +struct {
> +       __u32 type;
> +       __u32 map_flags;
> +       int *key;
> +       __u64 *value;
> +} bpf_next_dump SEC(".maps") = {
> +       .type = BPF_MAP_TYPE_SK_STORAGE,
> +       .map_flags = BPF_F_NO_PREALLOC,
> +};
> +
> +SEC("sockops")
> +int _sockops(struct bpf_sock_ops *ctx)
> +{
> +       struct bpf_tcp_sock *tcp_sk;
> +       struct bpf_sock *sk;
> +       __u64 *next_dump;
> +       __u64 now;
> +
> +       switch (ctx->op) {
> +       case BPF_SOCK_OPS_TCP_CONNECT_CB:
> +               bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
> +               return 1;
> +       case BPF_SOCK_OPS_RTT_CB:
> +               break;
> +       default:
> +               return 1;
> +       }
> +
> +       sk = ctx->sk;
> +       if (!sk)
> +               return 1;
> +
> +       next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0,
> +                                      BPF_SK_STORAGE_GET_F_CREATE);
> +       if (!next_dump)
> +               return 1;
> +
> +       now = bpf_ktime_get_ns();
> +       if (now < *next_dump)
> +               return 1;
> +
> +       tcp_sk = bpf_tcp_sock(sk);
> +       if (!tcp_sk)
> +               return 1;
> +
> +       *next_dump = now + INTERVAL;
> +
> +       bpf_printk("dsack_dups=%u delivered=%u\n",
> +                  tcp_sk->dsack_dups, tcp_sk->delivered);
> +       bpf_printk("delivered_ce=%u icsk_retransmits=%u\n",
> +                  tcp_sk->delivered_ce, tcp_sk->icsk_retransmits);
> +
> +       return 1;
> +}
> --
> 2.22.0.410.gd8fdbe21b5-goog
>
Stanislav Fomichev July 2, 2019, 12:31 a.m. UTC | #2
On 07/01, Y Song wrote:
> On Mon, Jul 1, 2019 at 1:49 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > Uses new RTT callback to dump stats every second.
> >
> > $ mkdir -p /tmp/cgroupv2
> > $ mount -t cgroup2 none /tmp/cgroupv2
> > $ mkdir -p /tmp/cgroupv2/foo
> > $ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
> > $ bpftool prog load ./tcp_dumpstats_kern.o /sys/fs/bpf/tcp_prog
> > $ bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
> > $ bpftool prog tracelog
> > $ # run neper/netperf/etc
> >
> > Used neper to compare performance with and without this program attached
> > and didn't see any noticeable performance impact.
> >
> > Sample output:
> >   <idle>-0     [015] ..s.  2074.128800: 0: dsack_dups=0 delivered=242526
> >   <idle>-0     [015] ..s.  2074.128808: 0: delivered_ce=0 icsk_retransmits=0
> >   <idle>-0     [015] ..s.  2075.130133: 0: dsack_dups=0 delivered=323599
> >   <idle>-0     [015] ..s.  2075.130138: 0: delivered_ce=0 icsk_retransmits=0
> >   <idle>-0     [005] .Ns.  2076.131440: 0: dsack_dups=0 delivered=404648
> >   <idle>-0     [005] .Ns.  2076.131447: 0: delivered_ce=0 icsk_retransmits=0
> >
> > Cc: Eric Dumazet <edumazet@google.com>
> > Cc: Priyaranjan Jha <priyarjha@google.com>
> > Cc: Yuchung Cheng <ycheng@google.com>
> > Cc: Soheil Hassas Yeganeh <soheil@google.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  samples/bpf/Makefile             |  1 +
> >  samples/bpf/tcp_dumpstats_kern.c | 65 ++++++++++++++++++++++++++++++++
> >  2 files changed, 66 insertions(+)
> >  create mode 100644 samples/bpf/tcp_dumpstats_kern.c
> 
> Currently, the bpf program into the repo. If we do not have another
> script to use
> this program for testing, the instructions in the commit message should be
> added to the bpf program as comments so people know what to do with this file
> without going through git commit message.
> 
> Is it possible to create a script to run with this bpf program?
There is a general instruction in samples/bpf/tcp_bpf.readme
with bpftool examples/etc. Should I just a comment at the top
of the BPF program to point people to that .readme file instead?

> >
> > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> > index 0917f8cf4fab..eaebbeead42f 100644
> > --- a/samples/bpf/Makefile
> > +++ b/samples/bpf/Makefile
> > @@ -154,6 +154,7 @@ always += tcp_iw_kern.o
> >  always += tcp_clamp_kern.o
> >  always += tcp_basertt_kern.o
> >  always += tcp_tos_reflect_kern.o
> > +always += tcp_dumpstats_kern.o
> >  always += xdp_redirect_kern.o
> >  always += xdp_redirect_map_kern.o
> >  always += xdp_redirect_cpu_kern.o
> > diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
> > new file mode 100644
> > index 000000000000..5d22bf61db65
> > --- /dev/null
> > +++ b/samples/bpf/tcp_dumpstats_kern.c
> > @@ -0,0 +1,65 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <linux/bpf.h>
> > +
> > +#include "bpf_helpers.h"
> > +#include "bpf_endian.h"
> > +
> > +#define INTERVAL                       1000000000ULL
> > +
> > +int _version SEC("version") = 1;
> > +char _license[] SEC("license") = "GPL";
> > +
> > +struct {
> > +       __u32 type;
> > +       __u32 map_flags;
> > +       int *key;
> > +       __u64 *value;
> > +} bpf_next_dump SEC(".maps") = {
> > +       .type = BPF_MAP_TYPE_SK_STORAGE,
> > +       .map_flags = BPF_F_NO_PREALLOC,
> > +};
> > +
> > +SEC("sockops")
> > +int _sockops(struct bpf_sock_ops *ctx)
> > +{
> > +       struct bpf_tcp_sock *tcp_sk;
> > +       struct bpf_sock *sk;
> > +       __u64 *next_dump;
> > +       __u64 now;
> > +
> > +       switch (ctx->op) {
> > +       case BPF_SOCK_OPS_TCP_CONNECT_CB:
> > +               bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
> > +               return 1;
> > +       case BPF_SOCK_OPS_RTT_CB:
> > +               break;
> > +       default:
> > +               return 1;
> > +       }
> > +
> > +       sk = ctx->sk;
> > +       if (!sk)
> > +               return 1;
> > +
> > +       next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0,
> > +                                      BPF_SK_STORAGE_GET_F_CREATE);
> > +       if (!next_dump)
> > +               return 1;
> > +
> > +       now = bpf_ktime_get_ns();
> > +       if (now < *next_dump)
> > +               return 1;
> > +
> > +       tcp_sk = bpf_tcp_sock(sk);
> > +       if (!tcp_sk)
> > +               return 1;
> > +
> > +       *next_dump = now + INTERVAL;
> > +
> > +       bpf_printk("dsack_dups=%u delivered=%u\n",
> > +                  tcp_sk->dsack_dups, tcp_sk->delivered);
> > +       bpf_printk("delivered_ce=%u icsk_retransmits=%u\n",
> > +                  tcp_sk->delivered_ce, tcp_sk->icsk_retransmits);
> > +
> > +       return 1;
> > +}
> > --
> > 2.22.0.410.gd8fdbe21b5-goog
> >
Y Song July 2, 2019, 12:39 a.m. UTC | #3
On Mon, Jul 1, 2019 at 5:31 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 07/01, Y Song wrote:
> > On Mon, Jul 1, 2019 at 1:49 PM Stanislav Fomichev <sdf@google.com> wrote:
> > >
> > > Uses new RTT callback to dump stats every second.
> > >
> > > $ mkdir -p /tmp/cgroupv2
> > > $ mount -t cgroup2 none /tmp/cgroupv2
> > > $ mkdir -p /tmp/cgroupv2/foo
> > > $ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
> > > $ bpftool prog load ./tcp_dumpstats_kern.o /sys/fs/bpf/tcp_prog
> > > $ bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog
> > > $ bpftool prog tracelog
> > > $ # run neper/netperf/etc
> > >
> > > Used neper to compare performance with and without this program attached
> > > and didn't see any noticeable performance impact.
> > >
> > > Sample output:
> > >   <idle>-0     [015] ..s.  2074.128800: 0: dsack_dups=0 delivered=242526
> > >   <idle>-0     [015] ..s.  2074.128808: 0: delivered_ce=0 icsk_retransmits=0
> > >   <idle>-0     [015] ..s.  2075.130133: 0: dsack_dups=0 delivered=323599
> > >   <idle>-0     [015] ..s.  2075.130138: 0: delivered_ce=0 icsk_retransmits=0
> > >   <idle>-0     [005] .Ns.  2076.131440: 0: dsack_dups=0 delivered=404648
> > >   <idle>-0     [005] .Ns.  2076.131447: 0: delivered_ce=0 icsk_retransmits=0
> > >
> > > Cc: Eric Dumazet <edumazet@google.com>
> > > Cc: Priyaranjan Jha <priyarjha@google.com>
> > > Cc: Yuchung Cheng <ycheng@google.com>
> > > Cc: Soheil Hassas Yeganeh <soheil@google.com>
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  samples/bpf/Makefile             |  1 +
> > >  samples/bpf/tcp_dumpstats_kern.c | 65 ++++++++++++++++++++++++++++++++
> > >  2 files changed, 66 insertions(+)
> > >  create mode 100644 samples/bpf/tcp_dumpstats_kern.c
> >
> > Currently, the bpf program into the repo. If we do not have another
> > script to use
> > this program for testing, the instructions in the commit message should be
> > added to the bpf program as comments so people know what to do with this file
> > without going through git commit message.
> >
> > Is it possible to create a script to run with this bpf program?
> There is a general instruction in samples/bpf/tcp_bpf.readme
> with bpftool examples/etc. Should I just a comment at the top
> of the BPF program to point people to that .readme file instead?

Referring to tcp_bpf.readme should work. Even simpler :-)

>
> > >
> > > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> > > index 0917f8cf4fab..eaebbeead42f 100644
> > > --- a/samples/bpf/Makefile
> > > +++ b/samples/bpf/Makefile
> > > @@ -154,6 +154,7 @@ always += tcp_iw_kern.o
> > >  always += tcp_clamp_kern.o
> > >  always += tcp_basertt_kern.o
> > >  always += tcp_tos_reflect_kern.o
> > > +always += tcp_dumpstats_kern.o
> > >  always += xdp_redirect_kern.o
> > >  always += xdp_redirect_map_kern.o
> > >  always += xdp_redirect_cpu_kern.o
> > > diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
> > > new file mode 100644
> > > index 000000000000..5d22bf61db65
> > > --- /dev/null
> > > +++ b/samples/bpf/tcp_dumpstats_kern.c
> > > @@ -0,0 +1,65 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +#include <linux/bpf.h>
> > > +
> > > +#include "bpf_helpers.h"
> > > +#include "bpf_endian.h"
> > > +
> > > +#define INTERVAL                       1000000000ULL
> > > +
> > > +int _version SEC("version") = 1;
> > > +char _license[] SEC("license") = "GPL";
[...]
diff mbox series

Patch

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0917f8cf4fab..eaebbeead42f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -154,6 +154,7 @@  always += tcp_iw_kern.o
 always += tcp_clamp_kern.o
 always += tcp_basertt_kern.o
 always += tcp_tos_reflect_kern.o
+always += tcp_dumpstats_kern.o
 always += xdp_redirect_kern.o
 always += xdp_redirect_map_kern.o
 always += xdp_redirect_cpu_kern.o
diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c
new file mode 100644
index 000000000000..5d22bf61db65
--- /dev/null
+++ b/samples/bpf/tcp_dumpstats_kern.c
@@ -0,0 +1,65 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define INTERVAL			1000000000ULL
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__u32 type;
+	__u32 map_flags;
+	int *key;
+	__u64 *value;
+} bpf_next_dump SEC(".maps") = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+	struct bpf_tcp_sock *tcp_sk;
+	struct bpf_sock *sk;
+	__u64 *next_dump;
+	__u64 now;
+
+	switch (ctx->op) {
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+		return 1;
+	case BPF_SOCK_OPS_RTT_CB:
+		break;
+	default:
+		return 1;
+	}
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0,
+				       BPF_SK_STORAGE_GET_F_CREATE);
+	if (!next_dump)
+		return 1;
+
+	now = bpf_ktime_get_ns();
+	if (now < *next_dump)
+		return 1;
+
+	tcp_sk = bpf_tcp_sock(sk);
+	if (!tcp_sk)
+		return 1;
+
+	*next_dump = now + INTERVAL;
+
+	bpf_printk("dsack_dups=%u delivered=%u\n",
+		   tcp_sk->dsack_dups, tcp_sk->delivered);
+	bpf_printk("delivered_ce=%u icsk_retransmits=%u\n",
+		   tcp_sk->delivered_ce, tcp_sk->icsk_retransmits);
+
+	return 1;
+}