diff mbox series

[bpf,2/2] selftests/bpf: add variable-length data concatenation pattern test

Message ID 20200616050432.1902042-2-andriin@fb.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [bpf,1/2] bpf: bpf_probe_read_kernel_str() has to return amount of data read on success | expand

Commit Message

Andrii Nakryiko June 16, 2020, 5:04 a.m. UTC
Add selftest that validates variable-length data reading and concatentation
with one big shared data array. This is a common pattern in production use for
monitoring and tracing applications, that potentially can read a lot of data,
but usually reads much less. Such pattern allows to determine precisely what
amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.

This is the first BPF selftest that at all looks at and tests
bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
0 on success, instead of amount of bytes successfully read.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 .../testing/selftests/bpf/prog_tests/varlen.c | 56 +++++++++++
 .../testing/selftests/bpf/progs/test_varlen.c | 96 +++++++++++++++++++
 2 files changed, 152 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/varlen.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_varlen.c

Comments

Daniel Borkmann June 16, 2020, 8:21 p.m. UTC | #1
On 6/16/20 7:04 AM, Andrii Nakryiko wrote:
> Add selftest that validates variable-length data reading and concatentation
> with one big shared data array. This is a common pattern in production use for
> monitoring and tracing applications, that potentially can read a lot of data,
> but usually reads much less. Such pattern allows to determine precisely what
> amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
> 
> This is the first BPF selftest that at all looks at and tests
> bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
> testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
> 0 on success, instead of amount of bytes successfully read.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>

Fix looks good, but I'm seeing an issue in the selftest on my side. With latest
Clang/LLVM I'm getting:

# ./test_progs -t varlen
#86 varlen:OK
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

All good, however, the test_progs-no_alu32 fails for me with:

# ./test_progs-no_alu32 -t varlen
Switching to flavor 'no_alu32' subdirectory...
libbpf: load bpf program failed: Invalid argument
libbpf: -- BEGIN DUMP LOG ---
libbpf:
arg#0 type is not a struct
Unrecognized arg#0 type PTR
; int pid = bpf_get_current_pid_tgid() >> 32;
0: (85) call bpf_get_current_pid_tgid#14
; int pid = bpf_get_current_pid_tgid() >> 32;
1: (77) r0 >>= 32
; if (test_pid != pid || !capture)
2: (18) r1 = 0xffffb14a4010c200
4: (61) r1 = *(u32 *)(r1 +0)
  R0_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=map_value(id=0,off=512,ks=4,vs=1056,imm=0) R10=fp0
; if (test_pid != pid || !capture)
5: (5d) if r1 != r0 goto pc+43
  R0_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0
6: (18) r1 = 0xffffb14a4010c204
8: (71) r1 = *(u8 *)(r1 +0)
  R0_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=map_value(id=0,off=516,ks=4,vs=1056,imm=0) R10=fp0
9: (15) if r1 == 0x0 goto pc+39
  R0=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R10=fp0
; len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
10: (18) r6 = 0xffffb14a4010c220
12: (18) r1 = 0xffffb14a4010c220
14: (b7) r2 = 256
15: (18) r3 = 0xffffb14a4010c000
17: (85) call bpf_probe_read_kernel_str#115
  R0=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R2_w=inv256 R3_w=map_value(id=0,off=0,ks=4,vs=1056,imm=0) R6_w=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
last_idx 17 first_idx 9
regs=4 stack=0 before 15: (18) r3 = 0xffffb14a4010c000
regs=4 stack=0 before 14: (b7) r2 = 256
18: (67) r0 <<= 32
19: (bf) r1 = r0
20: (77) r1 >>= 32
; if (len <= MAX_LEN) {
21: (25) if r1 > 0x100 goto pc+7
  R0=inv(id=0,smax_value=1099511627776,umax_value=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min_value=0,s32_max_value=0,u32_max_value=0) R1=inv(id=0,umax_value=256,var_off=(0x0; 0x1ff)) R6=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
;
22: (c7) r0 s>>= 32
; payload1_len1 = len;
23: (18) r1 = 0xffffb14a4010c208
25: (7b) *(u64 *)(r1 +0) = r0
  R0_w=inv(id=0,smin_value=-2147483648,smax_value=256) R1_w=map_value(id=0,off=520,ks=4,vs=1056,imm=0) R6=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
; payload += len;
26: (18) r6 = 0xffffb14a4010c220
28: (0f) r6 += r0
last_idx 28 first_idx 21
regs=1 stack=0 before 26: (18) r6 = 0xffffb14a4010c220
regs=1 stack=0 before 25: (7b) *(u64 *)(r1 +0) = r0
regs=1 stack=0 before 23: (18) r1 = 0xffffb14a4010c208
regs=1 stack=0 before 22: (c7) r0 s>>= 32
regs=1 stack=0 before 21: (25) if r1 > 0x100 goto pc+7
  R0_rw=invP(id=0,smax_value=1099511627776,umax_value=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min_value=0,s32_max_value=0,u32_max_value=0) R1_rw=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6_w=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
parent didn't have regs=1 stack=0 marks
last_idx 20 first_idx 9
regs=1 stack=0 before 20: (77) r1 >>= 32
regs=1 stack=0 before 19: (bf) r1 = r0
regs=1 stack=0 before 18: (67) r0 <<= 32
regs=1 stack=0 before 17: (85) call bpf_probe_read_kernel_str#115
value -2147483648 makes map_value pointer be out of bounds
processed 22 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 1

libbpf: -- END LOG --
libbpf: failed to load program 'raw_tp/sys_enter'
libbpf: failed to load object 'test_varlen'
libbpf: failed to load BPF skeleton 'test_varlen': -4007
test_varlen:FAIL:skel_open failed to open skeleton
#86 varlen:FAIL
Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
Andrii Nakryiko June 16, 2020, 9:27 p.m. UTC | #2
On Tue, Jun 16, 2020 at 1:21 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 6/16/20 7:04 AM, Andrii Nakryiko wrote:
> > Add selftest that validates variable-length data reading and concatentation
> > with one big shared data array. This is a common pattern in production use for
> > monitoring and tracing applications, that potentially can read a lot of data,
> > but usually reads much less. Such pattern allows to determine precisely what
> > amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
> >
> > This is the first BPF selftest that at all looks at and tests
> > bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
> > testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
> > 0 on success, instead of amount of bytes successfully read.
> >
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
>
> Fix looks good, but I'm seeing an issue in the selftest on my side. With latest
> Clang/LLVM I'm getting:
>
> # ./test_progs -t varlen
> #86 varlen:OK
> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>
> All good, however, the test_progs-no_alu32 fails for me with:

Yeah, same here. It's due to Clang emitting unnecessary bit shifts
because bpf_probe_read_kernel_str() is defined as returning 32-bit
int. I have a patch ready locally, just waiting for bpf-next to open,
which switches those helpers to return long, which auto-matically
fixes this test.

If it's not a problem, I'd just wait for that patch to go into
bpf-next. If not, I can sprinkle bits of assembly magic around to
force the kernel to do those bitshifts earlier. But I figured having
test_progs-no_alu32 failing one selftest temporarily wasn't too bad.

>
> # ./test_progs-no_alu32 -t varlen
> Switching to flavor 'no_alu32' subdirectory...
> libbpf: load bpf program failed: Invalid argument
> libbpf: -- BEGIN DUMP LOG ---
> libbpf:
> arg#0 type is not a struct
> Unrecognized arg#0 type PTR
> ; int pid = bpf_get_current_pid_tgid() >> 32;
> 0: (85) call bpf_get_current_pid_tgid#14
> ; int pid = bpf_get_current_pid_tgid() >> 32;
> 1: (77) r0 >>= 32
> ; if (test_pid != pid || !capture)
> 2: (18) r1 = 0xffffb14a4010c200
> 4: (61) r1 = *(u32 *)(r1 +0)
>   R0_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=map_value(id=0,off=512,ks=4,vs=1056,imm=0) R10=fp0
> ; if (test_pid != pid || !capture)
> 5: (5d) if r1 != r0 goto pc+43
>   R0_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0
> 6: (18) r1 = 0xffffb14a4010c204
> 8: (71) r1 = *(u8 *)(r1 +0)
>   R0_w=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=map_value(id=0,off=516,ks=4,vs=1056,imm=0) R10=fp0
> 9: (15) if r1 == 0x0 goto pc+39
>   R0=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R10=fp0
> ; len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
> 10: (18) r6 = 0xffffb14a4010c220
> 12: (18) r1 = 0xffffb14a4010c220
> 14: (b7) r2 = 256
> 15: (18) r3 = 0xffffb14a4010c000
> 17: (85) call bpf_probe_read_kernel_str#115
>   R0=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R2_w=inv256 R3_w=map_value(id=0,off=0,ks=4,vs=1056,imm=0) R6_w=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
> last_idx 17 first_idx 9
> regs=4 stack=0 before 15: (18) r3 = 0xffffb14a4010c000
> regs=4 stack=0 before 14: (b7) r2 = 256
> 18: (67) r0 <<= 32
> 19: (bf) r1 = r0
> 20: (77) r1 >>= 32
> ; if (len <= MAX_LEN) {
> 21: (25) if r1 > 0x100 goto pc+7
>   R0=inv(id=0,smax_value=1099511627776,umax_value=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min_value=0,s32_max_value=0,u32_max_value=0) R1=inv(id=0,umax_value=256,var_off=(0x0; 0x1ff)) R6=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
> ;
> 22: (c7) r0 s>>= 32
> ; payload1_len1 = len;
> 23: (18) r1 = 0xffffb14a4010c208
> 25: (7b) *(u64 *)(r1 +0) = r0
>   R0_w=inv(id=0,smin_value=-2147483648,smax_value=256) R1_w=map_value(id=0,off=520,ks=4,vs=1056,imm=0) R6=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
> ; payload += len;
> 26: (18) r6 = 0xffffb14a4010c220
> 28: (0f) r6 += r0
> last_idx 28 first_idx 21
> regs=1 stack=0 before 26: (18) r6 = 0xffffb14a4010c220
> regs=1 stack=0 before 25: (7b) *(u64 *)(r1 +0) = r0
> regs=1 stack=0 before 23: (18) r1 = 0xffffb14a4010c208
> regs=1 stack=0 before 22: (c7) r0 s>>= 32
> regs=1 stack=0 before 21: (25) if r1 > 0x100 goto pc+7
>   R0_rw=invP(id=0,smax_value=1099511627776,umax_value=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min_value=0,s32_max_value=0,u32_max_value=0) R1_rw=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6_w=map_value(id=0,off=544,ks=4,vs=1056,imm=0) R10=fp0
> parent didn't have regs=1 stack=0 marks
> last_idx 20 first_idx 9
> regs=1 stack=0 before 20: (77) r1 >>= 32
> regs=1 stack=0 before 19: (bf) r1 = r0
> regs=1 stack=0 before 18: (67) r0 <<= 32
> regs=1 stack=0 before 17: (85) call bpf_probe_read_kernel_str#115
> value -2147483648 makes map_value pointer be out of bounds
> processed 22 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 1
>
> libbpf: -- END LOG --
> libbpf: failed to load program 'raw_tp/sys_enter'
> libbpf: failed to load object 'test_varlen'
> libbpf: failed to load BPF skeleton 'test_varlen': -4007
> test_varlen:FAIL:skel_open failed to open skeleton
> #86 varlen:FAIL
> Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
Daniel Borkmann June 16, 2020, 10:23 p.m. UTC | #3
On 6/16/20 11:27 PM, Andrii Nakryiko wrote:
> On Tue, Jun 16, 2020 at 1:21 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>> On 6/16/20 7:04 AM, Andrii Nakryiko wrote:
>>> Add selftest that validates variable-length data reading and concatentation
>>> with one big shared data array. This is a common pattern in production use for
>>> monitoring and tracing applications, that potentially can read a lot of data,
>>> but usually reads much less. Such pattern allows to determine precisely what
>>> amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
>>>
>>> This is the first BPF selftest that at all looks at and tests
>>> bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
>>> testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
>>> 0 on success, instead of amount of bytes successfully read.
>>>
>>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
>>
>> Fix looks good, but I'm seeing an issue in the selftest on my side. With latest
>> Clang/LLVM I'm getting:
>>
>> # ./test_progs -t varlen
>> #86 varlen:OK
>> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>>
>> All good, however, the test_progs-no_alu32 fails for me with:
> 
> Yeah, same here. It's due to Clang emitting unnecessary bit shifts
> because bpf_probe_read_kernel_str() is defined as returning 32-bit
> int. I have a patch ready locally, just waiting for bpf-next to open,
> which switches those helpers to return long, which auto-matically
> fixes this test.
> 
> If it's not a problem, I'd just wait for that patch to go into
> bpf-next. If not, I can sprinkle bits of assembly magic around to
> force the kernel to do those bitshifts earlier. But I figured having
> test_progs-no_alu32 failing one selftest temporarily wasn't too bad.

Given {net,bpf}-next will open up soon, another option could be to take in the fix
itself to bpf and selftest would be submitted together with your other improvement;
any objections?

Thanks,
Daniel
Andrii Nakryiko June 16, 2020, 11:14 p.m. UTC | #4
On Tue, Jun 16, 2020 at 3:23 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 6/16/20 11:27 PM, Andrii Nakryiko wrote:
> > On Tue, Jun 16, 2020 at 1:21 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >> On 6/16/20 7:04 AM, Andrii Nakryiko wrote:
> >>> Add selftest that validates variable-length data reading and concatentation
> >>> with one big shared data array. This is a common pattern in production use for
> >>> monitoring and tracing applications, that potentially can read a lot of data,
> >>> but usually reads much less. Such pattern allows to determine precisely what
> >>> amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
> >>>
> >>> This is the first BPF selftest that at all looks at and tests
> >>> bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
> >>> testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
> >>> 0 on success, instead of amount of bytes successfully read.
> >>>
> >>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> >>
> >> Fix looks good, but I'm seeing an issue in the selftest on my side. With latest
> >> Clang/LLVM I'm getting:
> >>
> >> # ./test_progs -t varlen
> >> #86 varlen:OK
> >> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
> >>
> >> All good, however, the test_progs-no_alu32 fails for me with:
> >
> > Yeah, same here. It's due to Clang emitting unnecessary bit shifts
> > because bpf_probe_read_kernel_str() is defined as returning 32-bit
> > int. I have a patch ready locally, just waiting for bpf-next to open,
> > which switches those helpers to return long, which auto-matically
> > fixes this test.
> >
> > If it's not a problem, I'd just wait for that patch to go into
> > bpf-next. If not, I can sprinkle bits of assembly magic around to
> > force the kernel to do those bitshifts earlier. But I figured having
> > test_progs-no_alu32 failing one selftest temporarily wasn't too bad.
>
> Given {net,bpf}-next will open up soon, another option could be to take in the fix
> itself to bpf and selftest would be submitted together with your other improvement;
> any objections?
>

Yeah, no objections.

> Thanks,
> Daniel
Daniel Borkmann June 17, 2020, 3:51 p.m. UTC | #5
On 6/17/20 1:14 AM, Andrii Nakryiko wrote:
> On Tue, Jun 16, 2020 at 3:23 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>> On 6/16/20 11:27 PM, Andrii Nakryiko wrote:
>>> On Tue, Jun 16, 2020 at 1:21 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>>>> On 6/16/20 7:04 AM, Andrii Nakryiko wrote:
>>>>> Add selftest that validates variable-length data reading and concatentation
>>>>> with one big shared data array. This is a common pattern in production use for
>>>>> monitoring and tracing applications, that potentially can read a lot of data,
>>>>> but usually reads much less. Such pattern allows to determine precisely what
>>>>> amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
>>>>>
>>>>> This is the first BPF selftest that at all looks at and tests
>>>>> bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
>>>>> testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
>>>>> 0 on success, instead of amount of bytes successfully read.
>>>>>
>>>>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
>>>>
>>>> Fix looks good, but I'm seeing an issue in the selftest on my side. With latest
>>>> Clang/LLVM I'm getting:
>>>>
>>>> # ./test_progs -t varlen
>>>> #86 varlen:OK
>>>> Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
>>>>
>>>> All good, however, the test_progs-no_alu32 fails for me with:
>>>
>>> Yeah, same here. It's due to Clang emitting unnecessary bit shifts
>>> because bpf_probe_read_kernel_str() is defined as returning 32-bit
>>> int. I have a patch ready locally, just waiting for bpf-next to open,
>>> which switches those helpers to return long, which auto-matically
>>> fixes this test.
>>>
>>> If it's not a problem, I'd just wait for that patch to go into
>>> bpf-next. If not, I can sprinkle bits of assembly magic around to
>>> force the kernel to do those bitshifts earlier. But I figured having
>>> test_progs-no_alu32 failing one selftest temporarily wasn't too bad.
>>
>> Given {net,bpf}-next will open up soon, another option could be to take in the fix
>> itself to bpf and selftest would be submitted together with your other improvement;
>> any objections?
> 
> Yeah, no objections.

Sounds good, done.
John Fastabend June 18, 2020, 7:09 p.m. UTC | #6
Andrii Nakryiko wrote:
> Add selftest that validates variable-length data reading and concatentation
> with one big shared data array. This is a common pattern in production use for
> monitoring and tracing applications, that potentially can read a lot of data,
> but usually reads much less. Such pattern allows to determine precisely what
> amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
> 
> This is the first BPF selftest that at all looks at and tests
> bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
> testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
> 0 on success, instead of amount of bytes successfully read.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> ---

[...]

> +/* .data */
> +int payload2_len1 = -1;
> +int payload2_len2 = -1;
> +int total2 = -1;
> +char payload2[MAX_LEN + MAX_LEN] = { 1 };
> +
> +SEC("raw_tp/sys_enter")
> +int handler64(void *regs)
> +{
> +	int pid = bpf_get_current_pid_tgid() >> 32;
> +	void *payload = payload1;
> +	u64 len;
> +
> +	/* ignore irrelevant invocations */
> +	if (test_pid != pid || !capture)
> +		return 0;
> +
> +	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
> +	if (len <= MAX_LEN) {

Took me a bit grok this. You are relying on the fact that in errors,
such as a page fault, will encode to a large u64 value and so you
verifier is happy. But most of my programs actually want to distinguish
between legitimate errors on the probe vs buffer overrun cases.

Can we make these tests do explicit check for errors. For example,

  if (len < 0) goto abort;

But this also breaks your types here. This is what I was trying to
point out in the 1/2 patch thread. Wanted to make the point here as
well in case it wasn't clear. Not sure I did the best job explaining.

> +		payload += len;
> +		payload1_len1 = len;
> +	}
> +
> +	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
> +	if (len <= MAX_LEN) {
> +		payload += len;
> +		payload1_len2 = len;
> +	}
> +
> +	total1 = payload - (void *)payload1;
> +
> +	return 0;
> +}
Andrii Nakryiko June 18, 2020, 9:59 p.m. UTC | #7
On Thu, Jun 18, 2020 at 12:09 PM John Fastabend
<john.fastabend@gmail.com> wrote:
>
> Andrii Nakryiko wrote:
> > Add selftest that validates variable-length data reading and concatentation
> > with one big shared data array. This is a common pattern in production use for
> > monitoring and tracing applications, that potentially can read a lot of data,
> > but usually reads much less. Such pattern allows to determine precisely what
> > amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
> >
> > This is the first BPF selftest that at all looks at and tests
> > bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
> > testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
> > 0 on success, instead of amount of bytes successfully read.
> >
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
>
> [...]
>
> > +/* .data */
> > +int payload2_len1 = -1;
> > +int payload2_len2 = -1;
> > +int total2 = -1;
> > +char payload2[MAX_LEN + MAX_LEN] = { 1 };
> > +
> > +SEC("raw_tp/sys_enter")
> > +int handler64(void *regs)
> > +{
> > +     int pid = bpf_get_current_pid_tgid() >> 32;
> > +     void *payload = payload1;
> > +     u64 len;
> > +
> > +     /* ignore irrelevant invocations */
> > +     if (test_pid != pid || !capture)
> > +             return 0;
> > +
> > +     len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
> > +     if (len <= MAX_LEN) {
>
> Took me a bit grok this. You are relying on the fact that in errors,
> such as a page fault, will encode to a large u64 value and so you
> verifier is happy. But most of my programs actually want to distinguish
> between legitimate errors on the probe vs buffer overrun cases.

What buffer overrun? bpf_probe_read_str() family cannot return higher
value than MAX_LEN. If you want to detect truncated strings, then you
can attempt reading MAX_LEN + 1 and then check that the return result
is MAX_LEN exactly. But still, that would be something like:

u64 len;

len = bpf_probe_read_str(payload, MAX_LEN + 1, &buf);
if (len > MAX_LEN)
  return -1;
if (len == MAX_LEN) {
  /* truncated */
} else {
  /* full string */
}

>
> Can we make these tests do explicit check for errors. For example,
>
>   if (len < 0) goto abort;
>
> But this also breaks your types here. This is what I was trying to
> point out in the 1/2 patch thread. Wanted to make the point here as
> well in case it wasn't clear. Not sure I did the best job explaining.
>

I can write *a correct* C code in a lot of ways such that it will not
pass verifier verification, not sure what that will prove, though.

Have you tried using the pattern with two ifs with no-ALU32? Does it work?

Also you are cheating in your example (in patch #1 thread). You are
exiting on the first error and do not attempt to read any more data
after that. In practice, you want to get as much info as possible,
even if some of string reads fail (e.g., because argv might not be
paged in, but env is, or vice versa). So you'll end up doing this:

len = bpf_probe_read_str(...);
if (len >= 0 && len <= MAX_LEN) {
    payload += len;
}
...

... and of course it spectacularly fails in no-ALU32.

To be completely fair, this is a result of Clang optimization and
Yonghong is trying to deal with it as we speak. Switching int to long
for helpers doesn't help it either. But there are better code patterns
(unsigned len + single if check) that do work with both ALU32 and
no-ALU32.

And I just double-checked, this pattern keeps working for ALU32 with
both int and long types, so maybe there are unnecessary bit shifts,
but at least code is still verifiable.

So my point stands. int -> long helps in some cases and doesn't hurt
in others, so I argue that it's a good thing to do :)




> > +             payload += len;
> > +             payload1_len1 = len;
> > +     }
> > +
> > +     len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
> > +     if (len <= MAX_LEN) {
> > +             payload += len;
> > +             payload1_len2 = len;
> > +     }
> > +
> > +     total1 = payload - (void *)payload1;
> > +
> > +     return 0;
> > +}
John Fastabend June 18, 2020, 11:48 p.m. UTC | #8
Andrii Nakryiko wrote:
> On Thu, Jun 18, 2020 at 12:09 PM John Fastabend
> <john.fastabend@gmail.com> wrote:
> >
> > Andrii Nakryiko wrote:
> > > Add selftest that validates variable-length data reading and concatentation
> > > with one big shared data array. This is a common pattern in production use for
> > > monitoring and tracing applications, that potentially can read a lot of data,
> > > but usually reads much less. Such pattern allows to determine precisely what
> > > amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
> > >
> > > This is the first BPF selftest that at all looks at and tests
> > > bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
> > > testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
> > > 0 on success, instead of amount of bytes successfully read.
> > >
> > > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > > ---
> >
> > [...]
> >
> > > +/* .data */
> > > +int payload2_len1 = -1;
> > > +int payload2_len2 = -1;
> > > +int total2 = -1;
> > > +char payload2[MAX_LEN + MAX_LEN] = { 1 };
> > > +
> > > +SEC("raw_tp/sys_enter")
> > > +int handler64(void *regs)
> > > +{
> > > +     int pid = bpf_get_current_pid_tgid() >> 32;
> > > +     void *payload = payload1;
> > > +     u64 len;
> > > +
> > > +     /* ignore irrelevant invocations */
> > > +     if (test_pid != pid || !capture)
> > > +             return 0;
> > > +
> > > +     len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
> > > +     if (len <= MAX_LEN) {
> >
> > Took me a bit grok this. You are relying on the fact that in errors,
> > such as a page fault, will encode to a large u64 value and so you
> > verifier is happy. But most of my programs actually want to distinguish
> > between legitimate errors on the probe vs buffer overrun cases.
> 
> What buffer overrun? bpf_probe_read_str() family cannot return higher
> value than MAX_LEN. If you want to detect truncated strings, then you
> can attempt reading MAX_LEN + 1 and then check that the return result
> is MAX_LEN exactly. But still, that would be something like:
> u64 len;
> 
> len = bpf_probe_read_str(payload, MAX_LEN + 1, &buf);
> if (len > MAX_LEN)
>   return -1;
> if (len == MAX_LEN) {
>   /* truncated */
> } else {
>   /* full string */
> }

+1

> 
> >
> > Can we make these tests do explicit check for errors. For example,
> >
> >   if (len < 0) goto abort;
> >
> > But this also breaks your types here. This is what I was trying to
> > point out in the 1/2 patch thread. Wanted to make the point here as
> > well in case it wasn't clear. Not sure I did the best job explaining.
> >
> 
> I can write *a correct* C code in a lot of ways such that it will not
> pass verifier verification, not sure what that will prove, though.
> 
> Have you tried using the pattern with two ifs with no-ALU32? Does it work?

Ran our CI on both mcpu=v2 and mcpu=v3 and the pattern with multiple
ifs exists in those tests. They both passed so everything seems OK.
In the real progs though things are a bit more complicated I didn't
check the exact generate code. Some how I missed the case below.
I put a compiler barrier in a few spots so I think this is blocking
the optimization below causing no-alu32 failures. I'll remove the
barriers after I wrap a few things reviews.. my own bug fixes ;) and
see if I can trigger the case below.

> 
> Also you are cheating in your example (in patch #1 thread). You are
> exiting on the first error and do not attempt to read any more data
> after that. In practice, you want to get as much info as possible,
> even if some of string reads fail (e.g., because argv might not be
> paged in, but env is, or vice versa). So you'll end up doing this:

Sure.

> 
> len = bpf_probe_read_str(...);
> if (len >= 0 && len <= MAX_LEN) {
>     payload += len;
> }
> ...
> 
> ... and of course it spectacularly fails in no-ALU32.
> 
> To be completely fair, this is a result of Clang optimization and
> Yonghong is trying to deal with it as we speak. Switching int to long
> for helpers doesn't help it either. But there are better code patterns
> (unsigned len + single if check) that do work with both ALU32 and
> no-ALU32.

Great.

> 
> And I just double-checked, this pattern keeps working for ALU32 with
> both int and long types, so maybe there are unnecessary bit shifts,
> but at least code is still verifiable.
> 
> So my point stands. int -> long helps in some cases and doesn't hurt
> in others, so I argue that it's a good thing to do :)

Convinced me as well. I Acked the other patch thanks.
Andrii Nakryiko June 19, 2020, 12:09 a.m. UTC | #9
On Thu, Jun 18, 2020 at 4:48 PM John Fastabend <john.fastabend@gmail.com> wrote:
>
> Andrii Nakryiko wrote:
> > On Thu, Jun 18, 2020 at 12:09 PM John Fastabend
> > <john.fastabend@gmail.com> wrote:
> > >
> > > Andrii Nakryiko wrote:
> > > > Add selftest that validates variable-length data reading and concatentation
> > > > with one big shared data array. This is a common pattern in production use for
> > > > monitoring and tracing applications, that potentially can read a lot of data,
> > > > but usually reads much less. Such pattern allows to determine precisely what
> > > > amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.
> > > >
> > > > This is the first BPF selftest that at all looks at and tests
> > > > bpf_probe_read_str()-like helper's return value, closing a major gap in BPF
> > > > testing. It surfaced the problem with bpf_probe_read_kernel_str() returning
> > > > 0 on success, instead of amount of bytes successfully read.
> > > >
> > > > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > > > ---
> > >
> > > [...]
> > >
> > > > +/* .data */
> > > > +int payload2_len1 = -1;
> > > > +int payload2_len2 = -1;
> > > > +int total2 = -1;
> > > > +char payload2[MAX_LEN + MAX_LEN] = { 1 };
> > > > +
> > > > +SEC("raw_tp/sys_enter")
> > > > +int handler64(void *regs)
> > > > +{
> > > > +     int pid = bpf_get_current_pid_tgid() >> 32;
> > > > +     void *payload = payload1;
> > > > +     u64 len;
> > > > +
> > > > +     /* ignore irrelevant invocations */
> > > > +     if (test_pid != pid || !capture)
> > > > +             return 0;
> > > > +
> > > > +     len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
> > > > +     if (len <= MAX_LEN) {
> > >
> > > Took me a bit grok this. You are relying on the fact that in errors,
> > > such as a page fault, will encode to a large u64 value and so you
> > > verifier is happy. But most of my programs actually want to distinguish
> > > between legitimate errors on the probe vs buffer overrun cases.
> >
> > What buffer overrun? bpf_probe_read_str() family cannot return higher
> > value than MAX_LEN. If you want to detect truncated strings, then you
> > can attempt reading MAX_LEN + 1 and then check that the return result
> > is MAX_LEN exactly. But still, that would be something like:
> > u64 len;
> >
> > len = bpf_probe_read_str(payload, MAX_LEN + 1, &buf);
> > if (len > MAX_LEN)
> >   return -1;
> > if (len == MAX_LEN) {
> >   /* truncated */
> > } else {
> >   /* full string */
> > }
>
> +1
>
> >
> > >
> > > Can we make these tests do explicit check for errors. For example,
> > >
> > >   if (len < 0) goto abort;
> > >
> > > But this also breaks your types here. This is what I was trying to
> > > point out in the 1/2 patch thread. Wanted to make the point here as
> > > well in case it wasn't clear. Not sure I did the best job explaining.
> > >
> >
> > I can write *a correct* C code in a lot of ways such that it will not
> > pass verifier verification, not sure what that will prove, though.
> >
> > Have you tried using the pattern with two ifs with no-ALU32? Does it work?
>
> Ran our CI on both mcpu=v2 and mcpu=v3 and the pattern with multiple
> ifs exists in those tests. They both passed so everything seems OK.
> In the real progs though things are a bit more complicated I didn't
> check the exact generate code. Some how I missed the case below.
> I put a compiler barrier in a few spots so I think this is blocking
> the optimization below causing no-alu32 failures. I'll remove the
> barriers after I wrap a few things reviews.. my own bug fixes ;) and
> see if I can trigger the case below.
>
> >
> > Also you are cheating in your example (in patch #1 thread). You are
> > exiting on the first error and do not attempt to read any more data
> > after that. In practice, you want to get as much info as possible,
> > even if some of string reads fail (e.g., because argv might not be
> > paged in, but env is, or vice versa). So you'll end up doing this:
>
> Sure.
>
> >
> > len = bpf_probe_read_str(...);
> > if (len >= 0 && len <= MAX_LEN) {
> >     payload += len;
> > }
> > ...
> >
> > ... and of course it spectacularly fails in no-ALU32.
> >
> > To be completely fair, this is a result of Clang optimization and
> > Yonghong is trying to deal with it as we speak. Switching int to long
> > for helpers doesn't help it either. But there are better code patterns
> > (unsigned len + single if check) that do work with both ALU32 and
> > no-ALU32.
>
> Great.
>
> >
> > And I just double-checked, this pattern keeps working for ALU32 with
> > both int and long types, so maybe there are unnecessary bit shifts,
> > but at least code is still verifiable.
> >
> > So my point stands. int -> long helps in some cases and doesn't hurt
> > in others, so I argue that it's a good thing to do :)
>
> Convinced me as well. I Acked the other patch thanks.

Awesome :) Thanks for extra testing and validation on your side!
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
new file mode 100644
index 000000000000..e498e2b90da1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -0,0 +1,56 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_varlen.skel.h"
+
+#define CHECK_VAL(got, exp) \
+	CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \
+	      (long)(got), (long)(exp))
+
+void test_varlen(void)
+{
+	int duration = 0, err;
+	struct test_varlen* skel;
+	struct test_varlen__bss *bss;
+	struct test_varlen__data *data;
+	const char str1[] = "Hello, ";
+	const char str2[] = "World!";
+	const char exp_str[] = "Hello, \0World!\0";
+	const int size1 = sizeof(str1);
+	const int size2 = sizeof(str2);
+
+	skel = test_varlen__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+	bss = skel->bss;
+	data = skel->data;
+
+	err = test_varlen__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	bss->test_pid = getpid();
+
+	/* trigger everything */
+	memcpy(bss->buf_in1, str1, size1);
+	memcpy(bss->buf_in2, str2, size2);
+	bss->capture = true;
+	usleep(1);
+	bss->capture = false;
+
+	CHECK_VAL(bss->payload1_len1, size1);
+	CHECK_VAL(bss->payload1_len2, size2);
+	CHECK_VAL(bss->total1, size1 + size2);
+	CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+			
+	CHECK_VAL(data->payload2_len1, size1);
+	CHECK_VAL(data->payload2_len2, size2);
+	CHECK_VAL(data->total2, size1 + size2);
+	CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+cleanup:
+	test_varlen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
new file mode 100644
index 000000000000..09691852debf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -0,0 +1,96 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEN 256
+
+char buf_in1[MAX_LEN] = {};
+char buf_in2[MAX_LEN] = {};
+
+int test_pid = 0;
+bool capture = false;
+
+/* .bss */
+long payload1_len1 = 0;
+long payload1_len2 = 0;
+long total1 = 0;
+char payload1[MAX_LEN + MAX_LEN] = {};
+
+/* .data */
+int payload2_len1 = -1;
+int payload2_len2 = -1;
+int total2 = -1;
+char payload2[MAX_LEN + MAX_LEN] = { 1 };
+
+SEC("raw_tp/sys_enter")
+int handler64(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload1;
+	u64 len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload1_len1 = len;
+	}
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload1_len2 = len;
+	}
+
+	total1 = payload - (void *)payload1;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int handler32(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload2;
+	u32 len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload2_len1 = len;
+	}
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload2_len2 = len;
+	}
+
+	total2 = payload - (void *)payload2;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int handler_exit(void *regs)
+{
+	long bla;
+
+	if (bpf_probe_read_kernel(&bla, sizeof(bla), 0))
+		return 1;
+	else
+		return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";