Message ID | 20190815112044.38420-1-iii@linux.ibm.com |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf] bpf: fix accessing bpf_sysctl.file_pos on s390 | expand |
Ilya Leoshkevich <iii@linux.ibm.com> [Thu, 2019-08-15 11:20 -0700]: > "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value != > nux". This is because verifier rewrites a complete 32-bit > bpf_sysctl.file_pos update to a partial update of the first 32 bits of > 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian > systems. > > Fix by using an offset on big-endian systems. > > Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect > a problem there, since it expects to see 0, which it gets with high > probability in error cases, so change it to seek to offset 3 and expect > 3 in bpf_sysctl.file_pos. > > Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> Right, I missed this. Thanks for fixing! Acked-by: Andrey Ignatov <rdna@fb.com> > --- > include/linux/filter.h | 10 ++++++++++ > kernel/bpf/cgroup.c | 9 +++++++-- > tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++- > 3 files changed, 25 insertions(+), 3 deletions(-) > > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 92c6e31fb008..94e81c56d81c 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -760,6 +760,16 @@ bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) > #endif > } > > +static inline s16 > +bpf_ctx_narrow_access_offset(size_t variable_size, size_t access_size) > +{ > +#ifdef __LITTLE_ENDIAN > + return 0; > +#else > + return variable_size - access_size; > +#endif > +} > + > #define bpf_ctx_wide_access_ok(off, size, type, field) \ > (size == sizeof(__u64) && \ > off >= offsetof(type, field) && \ > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 0a00eaca6fae..b835fbb13ea8 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -1356,7 +1356,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, > treg, si->dst_reg, > offsetof(struct bpf_sysctl_kern, ppos)); > *insn++ = BPF_STX_MEM( > - BPF_SIZEOF(u32), treg, si->src_reg, 0); > + BPF_SIZEOF(u32), treg, si->src_reg, > + bpf_ctx_narrow_access_offset( > + sizeof(loff_t), sizeof(u32))); > *insn++ = BPF_LDX_MEM( > BPF_DW, treg, si->dst_reg, > offsetof(struct bpf_sysctl_kern, tmp_reg)); > @@ -1366,7 +1368,10 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, > si->dst_reg, si->src_reg, > offsetof(struct bpf_sysctl_kern, ppos)); > *insn++ = BPF_LDX_MEM( > - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0); > + BPF_SIZE(si->code), si->dst_reg, si->dst_reg, > + bpf_ctx_narrow_access_offset( > + sizeof(loff_t), > + bpf_size_to_bytes(BPF_SIZE(si->code)))); > } > *target_size = sizeof(u32); > break; > diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c > index a3bebd7c68dd..abc26248a7f1 100644 > --- a/tools/testing/selftests/bpf/test_sysctl.c > +++ b/tools/testing/selftests/bpf/test_sysctl.c > @@ -31,6 +31,7 @@ struct sysctl_test { > enum bpf_attach_type attach_type; > const char *sysctl; > int open_flags; > + int seek; > const char *newval; > const char *oldval; > enum { > @@ -139,7 +140,7 @@ static struct sysctl_test tests[] = { > /* If (file_pos == X) */ > BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, > offsetof(struct bpf_sysctl, file_pos)), > - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), > + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2), > > /* return ALLOW; */ > BPF_MOV64_IMM(BPF_REG_0, 1), > @@ -152,6 +153,7 @@ static struct sysctl_test tests[] = { > .attach_type = BPF_CGROUP_SYSCTL, > .sysctl = "kernel/ostype", > .open_flags = O_RDONLY, > + .seek = 3, > .result = SUCCESS, > }, > { > @@ -1442,6 +1444,11 @@ static int access_sysctl(const char *sysctl_path, > if (fd < 0) > return fd; > > + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) { > + log_err("lseek(%d) failed", test->seek); > + goto err; > + } > + > if (test->open_flags == O_RDONLY) { > char buf[128]; >
On 8/15/19 4:20 AM, Ilya Leoshkevich wrote: > "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value != > nux". This is because verifier rewrites a complete 32-bit > bpf_sysctl.file_pos update to a partial update of the first 32 bits of > 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian > systems. > > Fix by using an offset on big-endian systems. > > Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect > a problem there, since it expects to see 0, which it gets with high > probability in error cases, so change it to seek to offset 3 and expect > 3 in bpf_sysctl.file_pos. > > Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> > --- > include/linux/filter.h | 10 ++++++++++ > kernel/bpf/cgroup.c | 9 +++++++-- > tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++- > 3 files changed, 25 insertions(+), 3 deletions(-) > > diff --git a/include/linux/filter.h b/include/linux/filter.h > index 92c6e31fb008..94e81c56d81c 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -760,6 +760,16 @@ bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) > #endif > } > > +static inline s16 > +bpf_ctx_narrow_access_offset(size_t variable_size, size_t access_size) > +{ > +#ifdef __LITTLE_ENDIAN > + return 0; > +#else > + return variable_size - access_size; > +#endif > +} The change looks correct to me. But now in include/linux/filter.h we have to macros: static inline u8 bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) { u8 load_off = off & (size_default - 1); #ifdef __LITTLE_ENDIAN return load_off * 8; #else return (size_default - (load_off + size)) * 8; #endif } static inline s16 bpf_ctx_narrow_access_offset(size_t variable_size, size_t access_size) { #ifdef __LITTLE_ENDIAN return 0; #else return variable_size - access_size; #endif } It would be good if we can have ifdef __LITTLE_ENDIAN only in one place. How about something like below: static inline u8 bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default) { u8 access_off = off & (size_default - 1); #ifdef __LITTLE_ENDIAN return access_off; #else return size_default - (access_off + size); #endif } static inline u8 bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) { return bpf_ctx_narrow_access_offset(off, size, size_default) * 8; } > + > #define bpf_ctx_wide_access_ok(off, size, type, field) \ > (size == sizeof(__u64) && \ > off >= offsetof(type, field) && \ > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 0a00eaca6fae..b835fbb13ea8 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -1356,7 +1356,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, > treg, si->dst_reg, > offsetof(struct bpf_sysctl_kern, ppos)); > *insn++ = BPF_STX_MEM( > - BPF_SIZEOF(u32), treg, si->src_reg, 0); > + BPF_SIZEOF(u32), treg, si->src_reg, > + bpf_ctx_narrow_access_offset( > + sizeof(loff_t), sizeof(u32))); > *insn++ = BPF_LDX_MEM( > BPF_DW, treg, si->dst_reg, > offsetof(struct bpf_sysctl_kern, tmp_reg)); > @@ -1366,7 +1368,10 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, > si->dst_reg, si->src_reg, > offsetof(struct bpf_sysctl_kern, ppos)); > *insn++ = BPF_LDX_MEM( > - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0); > + BPF_SIZE(si->code), si->dst_reg, si->dst_reg, > + bpf_ctx_narrow_access_offset( > + sizeof(loff_t), > + bpf_size_to_bytes(BPF_SIZE(si->code)))); > } > *target_size = sizeof(u32); > break; > diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c > index a3bebd7c68dd..abc26248a7f1 100644 > --- a/tools/testing/selftests/bpf/test_sysctl.c > +++ b/tools/testing/selftests/bpf/test_sysctl.c > @@ -31,6 +31,7 @@ struct sysctl_test { > enum bpf_attach_type attach_type; > const char *sysctl; > int open_flags; > + int seek; > const char *newval; > const char *oldval; > enum { > @@ -139,7 +140,7 @@ static struct sysctl_test tests[] = { > /* If (file_pos == X) */ > BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, > offsetof(struct bpf_sysctl, file_pos)), > - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), > + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2), > > /* return ALLOW; */ > BPF_MOV64_IMM(BPF_REG_0, 1), > @@ -152,6 +153,7 @@ static struct sysctl_test tests[] = { > .attach_type = BPF_CGROUP_SYSCTL, > .sysctl = "kernel/ostype", > .open_flags = O_RDONLY, > + .seek = 3, > .result = SUCCESS, > }, > { > @@ -1442,6 +1444,11 @@ static int access_sysctl(const char *sysctl_path, > if (fd < 0) > return fd; > > + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) { > + log_err("lseek(%d) failed", test->seek); > + goto err; > + } > + > if (test->open_flags == O_RDONLY) { > char buf[128]; > >
> Am 16.08.2019 um 01:01 schrieb Yonghong Song <yhs@fb.com>: > > > > On 8/15/19 4:20 AM, Ilya Leoshkevich wrote: >> "ctx:file_pos sysctl:read write ok" fails on s390 with "Read value != >> nux". This is because verifier rewrites a complete 32-bit >> bpf_sysctl.file_pos update to a partial update of the first 32 bits of >> 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian >> systems. >> >> Fix by using an offset on big-endian systems. >> >> Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect >> a problem there, since it expects to see 0, which it gets with high >> probability in error cases, so change it to seek to offset 3 and expect >> 3 in bpf_sysctl.file_pos. >> >> Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") >> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> >> --- >> include/linux/filter.h | 10 ++++++++++ >> kernel/bpf/cgroup.c | 9 +++++++-- >> tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++- >> 3 files changed, 25 insertions(+), 3 deletions(-) >> >> diff --git a/include/linux/filter.h b/include/linux/filter.h >> index 92c6e31fb008..94e81c56d81c 100644 >> --- a/include/linux/filter.h >> +++ b/include/linux/filter.h >> @@ -760,6 +760,16 @@ bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) >> #endif >> } >> >> +static inline s16 >> +bpf_ctx_narrow_access_offset(size_t variable_size, size_t access_size) >> +{ >> +#ifdef __LITTLE_ENDIAN >> + return 0; >> +#else >> + return variable_size - access_size; >> +#endif >> +} > > The change looks correct to me. > But now in include/linux/filter.h we have to macros: > > static inline u8 > bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) > { > u8 load_off = off & (size_default - 1); > > #ifdef __LITTLE_ENDIAN > return load_off * 8; > #else > return (size_default - (load_off + size)) * 8; > #endif > } > > static inline s16 > bpf_ctx_narrow_access_offset(size_t variable_size, size_t access_size) > { > #ifdef __LITTLE_ENDIAN > return 0; > #else > return variable_size - access_size; > #endif > } > > It would be good if we can have ifdef __LITTLE_ENDIAN only in one place. > How about something like below: > > static inline u8 > bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default) > { > u8 access_off = off & (size_default - 1); > > #ifdef __LITTLE_ENDIAN > return access_off; > #else > return size_default - (access_off + size); > #endif > } > > static inline u8 > bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) > { > return bpf_ctx_narrow_access_offset(off, size, size_default) * 8; > } This does indeed look better, thanks! In this case, we don't even need bpf_ctx_narrow_load_shift() anymore, since doing u8 shift = bpf_ctx_narrow_access_offset( off, size, size_default) * 8; directly is quite readable. I will test and send a v2.
diff --git a/include/linux/filter.h b/include/linux/filter.h index 92c6e31fb008..94e81c56d81c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -760,6 +760,16 @@ bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default) #endif } +static inline s16 +bpf_ctx_narrow_access_offset(size_t variable_size, size_t access_size) +{ +#ifdef __LITTLE_ENDIAN + return 0; +#else + return variable_size - access_size; +#endif +} + #define bpf_ctx_wide_access_ok(off, size, type, field) \ (size == sizeof(__u64) && \ off >= offsetof(type, field) && \ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 0a00eaca6fae..b835fbb13ea8 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1356,7 +1356,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, ppos)); *insn++ = BPF_STX_MEM( - BPF_SIZEOF(u32), treg, si->src_reg, 0); + BPF_SIZEOF(u32), treg, si->src_reg, + bpf_ctx_narrow_access_offset( + sizeof(loff_t), sizeof(u32))); *insn++ = BPF_LDX_MEM( BPF_DW, treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, tmp_reg)); @@ -1366,7 +1368,10 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct bpf_sysctl_kern, ppos)); *insn++ = BPF_LDX_MEM( - BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0); + BPF_SIZE(si->code), si->dst_reg, si->dst_reg, + bpf_ctx_narrow_access_offset( + sizeof(loff_t), + bpf_size_to_bytes(BPF_SIZE(si->code)))); } *target_size = sizeof(u32); break; diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a3bebd7c68dd..abc26248a7f1 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -31,6 +31,7 @@ struct sysctl_test { enum bpf_attach_type attach_type; const char *sysctl; int open_flags; + int seek; const char *newval; const char *oldval; enum { @@ -139,7 +140,7 @@ static struct sysctl_test tests[] = { /* If (file_pos == X) */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2), /* return ALLOW; */ BPF_MOV64_IMM(BPF_REG_0, 1), @@ -152,6 +153,7 @@ static struct sysctl_test tests[] = { .attach_type = BPF_CGROUP_SYSCTL, .sysctl = "kernel/ostype", .open_flags = O_RDONLY, + .seek = 3, .result = SUCCESS, }, { @@ -1442,6 +1444,11 @@ static int access_sysctl(const char *sysctl_path, if (fd < 0) return fd; + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) { + log_err("lseek(%d) failed", test->seek); + goto err; + } + if (test->open_flags == O_RDONLY) { char buf[128];
"ctx:file_pos sysctl:read write ok" fails on s390 with "Read value != nux". This is because verifier rewrites a complete 32-bit bpf_sysctl.file_pos update to a partial update of the first 32 bits of 64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian systems. Fix by using an offset on big-endian systems. Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect a problem there, since it expects to see 0, which it gets with high probability in error cases, so change it to seek to offset 3 and expect 3 in bpf_sysctl.file_pos. Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> --- include/linux/filter.h | 10 ++++++++++ kernel/bpf/cgroup.c | 9 +++++++-- tools/testing/selftests/bpf/test_sysctl.c | 9 ++++++++- 3 files changed, 25 insertions(+), 3 deletions(-)