diff mbox series

[bpf-next,v2,2/2] samples/bpf: Add xdp_sample_pkts example

Message ID 152813003614.3465.11049830599815911223.stgit@alrua-kau
State Changes Requested, archived
Delegated to: BPF Maintainers
Headers show
Series [bpf-next,v2,1/2] trace_helpers.c: Add helpers to poll multiple perf FDs for events | expand

Commit Message

Toke Høiland-Jørgensen June 4, 2018, 4:33 p.m. UTC
This adds an example program showing how to sample packets from XDP using
the perf event buffer. The example userspace program just prints the
ethernet header for every packet sampled.

The example sets up a perf file descriptor per CPU, allowing the XDP
program to pass BPF_F_CURRENT_CPU and work no matter which CPU handles the
packet.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 samples/bpf/Makefile               |    4 +
 samples/bpf/xdp_sample_pkts_kern.c |   62 ++++++++++++++
 samples/bpf/xdp_sample_pkts_user.c |  162 ++++++++++++++++++++++++++++++++++++
 3 files changed, 228 insertions(+)
 create mode 100644 samples/bpf/xdp_sample_pkts_kern.c
 create mode 100644 samples/bpf/xdp_sample_pkts_user.c

Comments

Jakub Kicinski June 4, 2018, 10:32 p.m. UTC | #1
On Mon, 04 Jun 2018 18:33:56 +0200, Toke Høiland-Jørgensen wrote:
> +	if (load_bpf_file(filename)) {

Would you mind using libbpf instead of bpf_load.o?  I converted some
samples in be5bca44aa6b ("samples: bpf: convert some XDP samples from
bpf_load to libbpf"), it's pretty straight forward.  Maybe we can kill
bpf_load.o one day :)
Daniel Borkmann June 4, 2018, 11:43 p.m. UTC | #2
On 06/05/2018 12:32 AM, Jakub Kicinski wrote:
> On Mon, 04 Jun 2018 18:33:56 +0200, Toke Høiland-Jørgensen wrote:
>> +	if (load_bpf_file(filename)) {
> 
> Would you mind using libbpf instead of bpf_load.o?  I converted some
> samples in be5bca44aa6b ("samples: bpf: convert some XDP samples from
> bpf_load to libbpf"), it's pretty straight forward.  Maybe we can kill
> bpf_load.o one day :)

Agreed, we should only be using libbpf going forward.
Toke Høiland-Jørgensen June 5, 2018, 9:20 a.m. UTC | #3
Daniel Borkmann <daniel@iogearbox.net> writes:

> On 06/05/2018 12:32 AM, Jakub Kicinski wrote:
>> On Mon, 04 Jun 2018 18:33:56 +0200, Toke Høiland-Jørgensen wrote:
>>> +	if (load_bpf_file(filename)) {
>> 
>> Would you mind using libbpf instead of bpf_load.o?  I converted some
>> samples in be5bca44aa6b ("samples: bpf: convert some XDP samples from
>> bpf_load to libbpf"), it's pretty straight forward.  Maybe we can kill
>> bpf_load.o one day :)
>
> Agreed, we should only be using libbpf going forward.

Right, I'll rework to load using libbpf instead.
David Beckett June 6, 2018, 11:40 a.m. UTC | #4
On 04/06/18 17:33, Toke Høiland-Jørgensen wrote:
> +
> +#define SAMPLE_SIZE 64ul
> +
The program currently cannot sample minimum sized packets, as the 4 Byte 
crc checksum isnt present in ctx,
may be better to use 60ul sample size to allow for these packets to be 
processed?
> +	if (data + SAMPLE_SIZE < data_end) {
> +		/* The XDP perf_event_output handler will use the upper 32 bits
> +		 * of the flags argument as a number of bytes to include of the
I may be wrong on this but should this also be <= to allow for packets 
at SAMPLE_SIZE to be sampled?
Toke Høiland-Jørgensen June 6, 2018, 12:28 p.m. UTC | #5
David Beckett <david.beckett@netronome.com> writes:

> On 04/06/18 17:33, Toke Høiland-Jørgensen wrote:
>> +
>> +#define SAMPLE_SIZE 64ul
>> +
> The program currently cannot sample minimum sized packets, as the 4 Byte 
> crc checksum isnt present in ctx,
> may be better to use 60ul sample size to allow for these packets to be 
> processed?

Right. However, this also reminds me that I wanted to make the sampling
size dynamic, so it is possible to dump packets that are smaller than
the configured SAMPLE_SIZE. Will fix :)

>> +	if (data + SAMPLE_SIZE < data_end) {
>> +		/* The XDP perf_event_output handler will use the upper 32 bits
>> +		 * of the flags argument as a number of bytes to include of the
> I may be wrong on this but should this also be <= to allow for packets 
> at SAMPLE_SIZE to be sampled?

Yes, you are right, but that goes away with the change I mentioned above.

-Toke
diff mbox series

Patch

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 1303af10e54d..6f0c6d276a86 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -52,6 +52,7 @@  hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
+hostprogs-y += xdp_sample_pkts
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -107,6 +108,7 @@  xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := bpf_load.o xdpsock_user.o
 xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
+xdp_sample_pkts-objs := bpf_load.o xdp_sample_pkts_user.o $(TRACE_HELPERS)
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -163,6 +165,7 @@  always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
+always += xdp_sample_pkts_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -179,6 +182,7 @@  HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
 
 HOST_LOADLIBES		+= $(LIBBPF) -lelf
 HOSTLOADLIBES_tracex4		+= -lrt
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
new file mode 100644
index 000000000000..4560522ca015
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -0,0 +1,62 @@ 
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define SAMPLE_SIZE 64ul
+#define MAX_CPUS 24
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = MAX_CPUS,
+};
+
+SEC("xdp_sample")
+int xdp_sample_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+
+        /* Metadata will be in the perf event before the packet data. */
+	struct S {
+		u16 cookie;
+		u16 pkt_len;
+	} __attribute__((packed)) metadata;
+
+	if (data + SAMPLE_SIZE < data_end) {
+		/* The XDP perf_event_output handler will use the upper 32 bits
+		 * of the flags argument as a number of bytes to include of the
+		 * packet payload in the event data. If the size is too big, the
+		 * call to bpf_perf_event_output will fail and return -EFAULT.
+		 *
+		 * See bpf_xdp_event_output in net/core/filter.c.
+		 *
+		 * The BPF_F_CURRENT_CPU flag means that the event output fd
+		 * will be indexed by the CPU number in the event map.
+		 */
+		u64 flags = (SAMPLE_SIZE << 32) | BPF_F_CURRENT_CPU;
+		int ret;
+
+		metadata.cookie = 0xdead;
+		metadata.pkt_len = (u16)(data_end - data);
+
+		ret = bpf_perf_event_output(ctx, &my_map, flags,
+				      &metadata, sizeof(metadata));
+		if(ret)
+			bpf_printk("perf_event_output failed: %d\n", ret);
+	}
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
new file mode 100644
index 000000000000..35c5dd953f48
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -0,0 +1,162 @@ 
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <net/if.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/sysinfo.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include <libbpf.h>
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define MAX_CPUS 24
+static int pmu_fds[MAX_CPUS], if_idx = 0;
+static struct perf_event_mmap_page *headers[MAX_CPUS];
+static char *if_name;
+
+static int do_attach(int idx, int fd, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, fd, 0);
+	if (err < 0)
+		printf("ERROR: failed to attach program to %s\n", name);
+
+	return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, -1, 0);
+	if (err < 0)
+		printf("ERROR: failed to detach program from %s\n", name);
+
+	return err;
+}
+
+#define SAMPLE_SIZE 64
+
+static int print_bpf_output(void *data, int size)
+{
+	struct {
+		__u16 cookie;
+		__u16 pkt_len;
+		__u8  pkt_data[SAMPLE_SIZE];
+	} __attribute__((packed)) *e = data;
+	int i;
+
+	if (e->cookie != 0xdead) {
+		printf("BUG cookie %x sized %d\n",
+		       e->cookie, size);
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+
+	printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
+	for (i = 0; i < 14 && i < e->pkt_len; i++)
+		printf("%02x ", e->pkt_data[i]);
+	printf("\n");
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_bpf_perf_event(int num)
+{
+	struct perf_event_attr attr = {
+		.sample_type = PERF_SAMPLE_RAW,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.wakeup_events = 1, /* get an fd notification for every event */
+	};
+	int i;
+
+	for (i = 0; i < num; i++) {
+		int key = i;
+
+		pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0);
+
+		assert(pmu_fds[i] >= 0);
+		assert(bpf_map_update_elem(map_fd[0], &key, &pmu_fds[i], BPF_ANY) == 0);
+		ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0);
+	}
+}
+
+static void sig_handler(int signo)
+{
+	do_detach(if_idx, if_name);
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int ret, err;
+	int numcpus;
+	int i;
+
+	if (argc < 2) {
+		printf("Usage: %s <ifname>\n", argv[0]);
+		return 1;
+	}
+
+	numcpus = get_nprocs();
+	if (numcpus > MAX_CPUS)
+		numcpus = MAX_CPUS;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if_idx = if_nametoindex(argv[1]);
+	if (!if_idx)
+		if_idx = strtoul(argv[1], NULL, 0);
+
+	if (!if_idx) {
+		fprintf(stderr, "Invalid ifname\n");
+		return 1;
+	}
+	if_name = argv[1];
+	err = do_attach(if_idx, prog_fd[0], argv[1]);
+	if (err)
+		return err;
+
+	if (signal(SIGINT, sig_handler) ||
+	    signal(SIGHUP, sig_handler) ||
+	    signal(SIGTERM, sig_handler)) {
+		perror("signal");
+		return 1;
+	}
+
+	test_bpf_perf_event(numcpus);
+
+	for (i = 0; i < numcpus; i++)
+		if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0)
+			return 1;
+
+	ret = perf_event_poller_multi(pmu_fds, headers, numcpus, print_bpf_output);
+	kill(0, SIGINT);
+	return ret;
+}