diff mbox series

[v6,bpf-next,01/17] bpf: verifier: offer more accurate helper function arg and return type

Message ID 1556880164-10689-2-git-send-email-jiong.wang@netronome.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: eliminate zero extensions for sub-register writes | expand

Commit Message

Jiong Wang May 3, 2019, 10:42 a.m. UTC
BPF helper call transfers execution from eBPF insns to native functions
while verifier insn walker only walks eBPF insns. So, verifier can only
knows argument and return value types from explicit helper function
prototype descriptions.

For 32-bit optimization, it is important to know whether argument (register
use from eBPF insn) and return value (register define from external
function) is 32-bit or 64-bit, so corresponding registers could be
zero-extended correctly.

For arguments, they are register uses, we conservatively treat all of them
as 64-bit at default, while the following new bpf_arg_type are added so we
could start to mark those frequently used helper functions with more
accurate argument type.

  ARG_CONST_SIZE32
  ARG_CONST_SIZE32_OR_ZERO
  ARG_ANYTHING32

A few helper functions shown up frequently inside Cilium bpf program are
updated using these new types.

For return values, they are register defs, we need to know accurate width
for correct zero extensions. Given most of the helper functions returning
integers return 32-bit value, a new RET_INTEGER64 is added to make those
functions return 64-bit value. All related helper functions are updated.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 include/linux/bpf.h      |  6 +++++-
 kernel/bpf/core.c        |  2 +-
 kernel/bpf/helpers.c     | 10 +++++-----
 kernel/bpf/verifier.c    | 15 ++++++++++-----
 kernel/trace/bpf_trace.c |  4 ++--
 net/core/filter.c        | 38 +++++++++++++++++++-------------------
 6 files changed, 42 insertions(+), 33 deletions(-)

Comments

Daniel Borkmann May 6, 2019, 1:57 p.m. UTC | #1
On 05/03/2019 12:42 PM, Jiong Wang wrote:
> BPF helper call transfers execution from eBPF insns to native functions
> while verifier insn walker only walks eBPF insns. So, verifier can only
> knows argument and return value types from explicit helper function
> prototype descriptions.
> 
> For 32-bit optimization, it is important to know whether argument (register
> use from eBPF insn) and return value (register define from external
> function) is 32-bit or 64-bit, so corresponding registers could be
> zero-extended correctly.
> 
> For arguments, they are register uses, we conservatively treat all of them
> as 64-bit at default, while the following new bpf_arg_type are added so we
> could start to mark those frequently used helper functions with more
> accurate argument type.
> 
>   ARG_CONST_SIZE32
>   ARG_CONST_SIZE32_OR_ZERO

For the above two, I was wondering is there a case where the passed size is
not used as 32 bit aka couldn't we generally assume 32 bit here w/o adding
these two extra arg types? For ARG_ANYTHING32 and RET_INTEGER64 definitely
makes sense (btw, opt-in value like RET_INTEGER32 might have been easier for
reviewing converted helpers).

>   ARG_ANYTHING32
> 
> A few helper functions shown up frequently inside Cilium bpf program are
> updated using these new types.
> 
> For return values, they are register defs, we need to know accurate width
> for correct zero extensions. Given most of the helper functions returning
> integers return 32-bit value, a new RET_INTEGER64 is added to make those
> functions return 64-bit value. All related helper functions are updated.
> 
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
[...]

> @@ -2003,9 +2003,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
>  	.pkt_access	= true,
>  	.ret_type	= RET_INTEGER,
>  	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
> -	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
> +	.arg2_type	= ARG_CONST_SIZE32_OR_ZERO,
>  	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
> -	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
> +	.arg4_type	= ARG_CONST_SIZE32_OR_ZERO,
>  	.arg5_type	= ARG_ANYTHING,
>  };

I noticed that the above and also bpf_csum_update() would need to be converted
to RET_INTEGER64 as they would break otherwise: it's returning error but also
u32 csum value, so use for error checking would be s64 ret = bpf_csum_xyz(...).

Thanks,
Daniel
Alexei Starovoitov May 6, 2019, 3:50 p.m. UTC | #2
On Fri, May 03, 2019 at 11:42:28AM +0100, Jiong Wang wrote:
> BPF helper call transfers execution from eBPF insns to native functions
> while verifier insn walker only walks eBPF insns. So, verifier can only
> knows argument and return value types from explicit helper function
> prototype descriptions.
> 
> For 32-bit optimization, it is important to know whether argument (register
> use from eBPF insn) and return value (register define from external
> function) is 32-bit or 64-bit, so corresponding registers could be
> zero-extended correctly.
> 
> For arguments, they are register uses, we conservatively treat all of them
> as 64-bit at default, while the following new bpf_arg_type are added so we
> could start to mark those frequently used helper functions with more
> accurate argument type.
> 
>   ARG_CONST_SIZE32
>   ARG_CONST_SIZE32_OR_ZERO
>   ARG_ANYTHING32
> 
> A few helper functions shown up frequently inside Cilium bpf program are
> updated using these new types.
> 
> For return values, they are register defs, we need to know accurate width
> for correct zero extensions. Given most of the helper functions returning
> integers return 32-bit value, a new RET_INTEGER64 is added to make those
> functions return 64-bit value. All related helper functions are updated.
> 
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> ---
>  include/linux/bpf.h      |  6 +++++-
>  kernel/bpf/core.c        |  2 +-
>  kernel/bpf/helpers.c     | 10 +++++-----
>  kernel/bpf/verifier.c    | 15 ++++++++++-----
>  kernel/trace/bpf_trace.c |  4 ++--
>  net/core/filter.c        | 38 +++++++++++++++++++-------------------
>  6 files changed, 42 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 9a21848..11a5fb9 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -198,9 +198,12 @@ enum bpf_arg_type {
>  
>  	ARG_CONST_SIZE,		/* number of bytes accessed from memory */
>  	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
> +	ARG_CONST_SIZE32,	/* Likewise, but size fits into 32-bit */
> +	ARG_CONST_SIZE32_OR_ZERO,	/* Ditto */

these two should not be necessary. The program must pass constant into
the helper. The verifier is smart enough already to see it through.
Looks like patch 2 is using it as a band-aid.

>  	ARG_PTR_TO_CTX,		/* pointer to context */
>  	ARG_ANYTHING,		/* any (initialized) argument is ok */
> +	ARG_ANYTHING32,		/* Likewise, but it is a 32-bit argument */

Such annotation has subtle semantics that I don't think we've explored
in the past and I don't see from commit logs of this patch set that
the necessary analysis was done.
In particular 'int' in the helper argument does not mean that the verifier
will reject 64-bit values. It also doesn't mean that the helper
will reject them at run-time. In most cases it's a silent truncation.
Like bpf_tail_call will accept 64-bit value, and will cast it to u32
before doing max_entries bounds check.
In other cases it could be signed vs unsigned interpretation inside
the helper.
imo the code base is not ready for semi-automatic remarking of
helpers with ARG_ANYTHING32 when helper is accepting 32-bit value.
Definitely not something short term and not a prerequisite for this set.

Longer term... if we introduce ARG_ANYTHING32, what would that mean?
Would the verifier insert zext before calling the helper automatically
and we can drop truncation in the helper? May be useful?
What about passing negative value ?
ld_imm64 r2, -1
call foo
is a valid program.

>  	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
>  	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
>  	ARG_PTR_TO_INT,		/* pointer to int */
> @@ -210,7 +213,8 @@ enum bpf_arg_type {
>  
>  /* type of values returned from helper functions */
>  enum bpf_return_type {
> -	RET_INTEGER,			/* function returns integer */
> +	RET_INTEGER,			/* function returns 32-bit integer */
> +	RET_INTEGER64,			/* function returns 64-bit integer */

These type of annotations are dangerous too since they don't consider sign
of the return value. In BPF ISA all arguments and return values are 64-bit.
When it's full 64-bit we don't need to specify the sign, since sing-bit
will be interpreted by the program and the verifier doesn't need to worry.
If we say that helper returns 32-bit we need state whether it's signed or not
for the verifier to analyze it properly.

I think it's the best to drop this patch. I don't think the rest of the set
really needs it. It looks to me as a last minute optimization that I really
wish wasn't there, because the rest we've discussed in depth and the set
was practically ready to land, but now bpf-next is closed.
Please resubmit after it reopens.
Jiong Wang May 6, 2019, 10:25 p.m. UTC | #3
Daniel Borkmann writes:

> On 05/03/2019 12:42 PM, Jiong Wang wrote:
>> BPF helper call transfers execution from eBPF insns to native functions
>> while verifier insn walker only walks eBPF insns. So, verifier can only
>> knows argument and return value types from explicit helper function
>> prototype descriptions.
>> 
>> For 32-bit optimization, it is important to know whether argument (register
>> use from eBPF insn) and return value (register define from external
>> function) is 32-bit or 64-bit, so corresponding registers could be
>> zero-extended correctly.
>> 
>> For arguments, they are register uses, we conservatively treat all of them
>> as 64-bit at default, while the following new bpf_arg_type are added so we
>> could start to mark those frequently used helper functions with more
>> accurate argument type.
>> 
>>   ARG_CONST_SIZE32
>>   ARG_CONST_SIZE32_OR_ZERO
>
> For the above two, I was wondering is there a case where the passed size is
> not used as 32 bit aka couldn't we generally assume 32 bit here w/o adding
> these two extra arg types?

Will give a detailed reply tomorrow. IIRC there was. I was benchmarking
bpf_lxc and found it contains quite a few helper calls which generates a
fairly percentage of unnecessary zext on parameters.

> For ARG_ANYTHING32 and RET_INTEGER64 definitely
> makes sense (btw, opt-in value like RET_INTEGER32 might have been easier for
> reviewing converted helpers).
>
>>   ARG_ANYTHING32
>> 
>> A few helper functions shown up frequently inside Cilium bpf program are
>> updated using these new types.
>> 
>> For return values, they are register defs, we need to know accurate width
>> for correct zero extensions. Given most of the helper functions returning
>> integers return 32-bit value, a new RET_INTEGER64 is added to make those
>> functions return 64-bit value. All related helper functions are updated.
>> 
>> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> [...]
>
>> @@ -2003,9 +2003,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
>>  	.pkt_access	= true,
>>  	.ret_type	= RET_INTEGER,
>>  	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
>> -	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
>> +	.arg2_type	= ARG_CONST_SIZE32_OR_ZERO,
>>  	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
>> -	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
>> +	.arg4_type	= ARG_CONST_SIZE32_OR_ZERO,
>>  	.arg5_type	= ARG_ANYTHING,
>>  };
>
> I noticed that the above and also bpf_csum_update() would need to be converted
> to RET_INTEGER64 as they would break otherwise: it's returning error but also
> u32 csum value, so use for error checking would be s64 ret =
> bpf_csum_xyz(...).

Ack.

(I did searched ^u64 inside upai header, should also search ^s64, will
double-check all changes)

>
> Thanks,
> Daniel
Jiong Wang May 8, 2019, 11:12 a.m. UTC | #4
Jiong Wang writes:

> Daniel Borkmann writes:
>
>> On 05/03/2019 12:42 PM, Jiong Wang wrote:
>>> BPF helper call transfers execution from eBPF insns to native functions
>>> while verifier insn walker only walks eBPF insns. So, verifier can only
>>> knows argument and return value types from explicit helper function
>>> prototype descriptions.
>>> 
>>> For 32-bit optimization, it is important to know whether argument (register
>>> use from eBPF insn) and return value (register define from external
>>> function) is 32-bit or 64-bit, so corresponding registers could be
>>> zero-extended correctly.
>>> 
>>> For arguments, they are register uses, we conservatively treat all of them
>>> as 64-bit at default, while the following new bpf_arg_type are added so we
>>> could start to mark those frequently used helper functions with more
>>> accurate argument type.
>>> 
>>>   ARG_CONST_SIZE32
>>>   ARG_CONST_SIZE32_OR_ZERO
>>
>> For the above two, I was wondering is there a case where the passed size is
>> not used as 32 bit aka couldn't we generally assume 32 bit here w/o adding
>> these two extra arg types?
>
> Will give a detailed reply tomorrow. IIRC there was.

"bpf_perf_event_output" etc inside kernel/trace/bpf_trace.c. They are using
ARG_CONST_SIZE_OR_ZERO for "u64 size" which should have been a mistake,
because "size" parameter for bpf_perf_event_output is used to initialize
the same field inside struct perf_raw_record which is u32. This lead me
thinking people might use in-accurate arg type description.

Was keeping the original ARG_CONST_SIZE/OR_ZERO as 64-bit meaning at
default, mostly because I am thinking it is safer. If we assume 
ARG_CONST_SIZE/OR_ZERO are 32-bit at default, we must check all helper
functions to make sure their arg types are correct, and need to make sure
all future added helpers has correct arg types as well. Otherwise, if a
helper function has u64 arg and it comes from u32 zext, forget to use
new ARG_CONST_SIZE64 will cause "val" not zero extended, and it will be a
correctness issue.

  u32 val
  helper_call((u64)val)

Instead, if we assume existing ARG_CONST_SIZE/OR_ZERO are u64, it just
introduce redundant zext but not correctness issue.

Regards,
Jiong

>> For ARG_ANYTHING32 and RET_INTEGER64 definitely
>> makes sense (btw, opt-in value like RET_INTEGER32 might have been easier for
>> reviewing converted helpers

>>> A few helper functions shown up frequently inside Cilium bpf program are
>>> updated using these new types.
>>> 
>>> For return values, they are register defs, we need to know accurate width
>>> for correct zero extensions. Given most of the helper functions returning
>>> integers return 32-bit value, a new RET_INTEGER64 is added to make those
>>> functions return 64-bit value. All related helper functions are updated.
>>> 
>>> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
>> [...]
>>
>>> @@ -2003,9 +2003,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
>>>  	.pkt_access	= true,
>>>  	.ret_type	= RET_INTEGER,
>>>  	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
>>> -	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
>>> +	.arg2_type	= ARG_CONST_SIZE32_OR_ZERO,
>>>  	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
>>> -	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
>>> +	.arg4_type	= ARG_CONST_SIZE32_OR_ZERO,
>>>  	.arg5_type	= ARG_ANYTHING,
>>>  };
>>
>> I noticed that the above and also bpf_csum_update() would need to be converted
>> to RET_INTEGER64 as they would break otherwise: it's returning error but also
>> u32 csum value, so use for error checking would be s64 ret =
>> bpf_csum_xyz(...).
>
> Ack.
>
> (I did searched ^u64 inside upai header, should also search ^s64, will
> double-check all changes)
>
>>
>> Thanks,
>> Daniel
Jiong Wang May 8, 2019, 2:45 p.m. UTC | #5
Alexei Starovoitov writes:

> On Fri, May 03, 2019 at 11:42:28AM +0100, Jiong Wang wrote:
>> BPF helper call transfers execution from eBPF insns to native functions
>> while verifier insn walker only walks eBPF insns. So, verifier can only
>> knows argument and return value types from explicit helper function
>> prototype descriptions.
>> 
>> For 32-bit optimization, it is important to know whether argument (register
>> use from eBPF insn) and return value (register define from external
>> function) is 32-bit or 64-bit, so corresponding registers could be
>> zero-extended correctly.
>> 
>> For arguments, they are register uses, we conservatively treat all of them
>> as 64-bit at default, while the following new bpf_arg_type are added so we
>> could start to mark those frequently used helper functions with more
>> accurate argument type.
>> 
>>   ARG_CONST_SIZE32
>>   ARG_CONST_SIZE32_OR_ZERO
>>   ARG_ANYTHING32
>> 
>> A few helper functions shown up frequently inside Cilium bpf program are
>> updated using these new types.
>> 
>> For return values, they are register defs, we need to know accurate width
>> for correct zero extensions. Given most of the helper functions returning
>> integers return 32-bit value, a new RET_INTEGER64 is added to make those
>> functions return 64-bit value. All related helper functions are updated.
>> 
>> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
>> ---
>>  include/linux/bpf.h      |  6 +++++-
>>  kernel/bpf/core.c        |  2 +-
>>  kernel/bpf/helpers.c     | 10 +++++-----
>>  kernel/bpf/verifier.c    | 15 ++++++++++-----
>>  kernel/trace/bpf_trace.c |  4 ++--
>>  net/core/filter.c        | 38 +++++++++++++++++++-------------------
>>  6 files changed, 42 insertions(+), 33 deletions(-)
>> 
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 9a21848..11a5fb9 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -198,9 +198,12 @@ enum bpf_arg_type {
>>  
>>  	ARG_CONST_SIZE,		/* number of bytes accessed from memory */
>>  	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
>> +	ARG_CONST_SIZE32,	/* Likewise, but size fits into 32-bit */
>> +	ARG_CONST_SIZE32_OR_ZERO,	/* Ditto */
>
> these two should not be necessary. The program must pass constant into
> the helper. The verifier is smart enough already to see it through.
> Looks like patch 2 is using it as a band-aid.
>
>>  	ARG_PTR_TO_CTX,		/* pointer to context */
>>  	ARG_ANYTHING,		/* any (initialized) argument is ok */
>> +	ARG_ANYTHING32,		/* Likewise, but it is a 32-bit argument */
>
> Such annotation has subtle semantics that I don't think we've explored
> in the past and I don't see from commit logs of this patch set that
> the necessary analysis was done.
> In particular 'int' in the helper argument does not mean that the verifier
> will reject 64-bit values. It also doesn't mean that the helper
> will reject them at run-time. In most cases it's a silent truncation.
> Like bpf_tail_call will accept 64-bit value, and will cast it to u32
> before doing max_entries bounds check.
> In other cases it could be signed vs unsigned interpretation inside
> the helper.

I might be misunderstanding your points, please just shout if I am wrong.

Suppose the following BPF code:

  unsigned helper(unsigned long long, unsigned long long);
  unsigned long long test(unsigned *a, unsigned int c)
  {
    unsigned int b = *a;
    c += 10;
    return helper(b, c);
  }

We get the following instruction sequence by latest llvm
(-O2 -mattr=+alu32 -mcpu=v3)

  test:
    1: w1 = *(u32 *)(r1 + 0)
    2: w2 += 10
    3: call helper
    4: exit

Argument Types
===
Now instruction 1 and 2 are sub-register defines, and instruction 3, the
call, use them implicitly.

Without the introduction of the new ARG_CONST_SIZE32 and
ARG_CONST_SIZE32_OR_ZERO, we don't know what should be done with w1 and
w2, zero-extend them should be fine for all cases, but could resulting in a
few unnecessary zero-extension inserted.

By introducing the new types, we know both are u64, so should zero-extend
them.

This sort of comes to how high 32-bit zero extension semantics are
exploited by compiler or handle-written assembler.

For LLVM compiler, it will exploit ISA zext semantics for quite a few
cases.

  1. narrow load at are naturally done by instruction combine pattern,
  things like:

  def : Pat<(i64 (zextloadi32 ADDRri:$src)),
              (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>;

  2. For ALUs, there is BPF peephole pass trying to do it, sequence for
  preparing argument 2 would have been:
  
     w2 = 16
     r2 <<= 32
     r2 >>= 32
     
  But, the peephole pass could know the two shifts are shifting value comes
  from sub-register MOV, so safe to remove the redundant two shifts.

So, finally the BPF instruction sequence we get is the one shown early,
without new argument types, verifier doesn't know how to deal with
zext.

Static compiler has header files which contains function prototypes/C-types
to guide code-gen. IMHO, verifier needs the same thing, which is BPF helper
function prototype description.

And that why I introduce these new argument types, without them, there
could be more than 10% extra zext inserted on benchmarks like bpf_lxc.

Return Types
===
If we change the testcase a little bit by introducing one new function
helper1.

  unsigned helper(unsigned long long, unsigned long long);
  unsigned long long helper1(unsigned long long);

  unsigned long long test(unsigned *a, unsigned int c)
  {
    unsigned int b = *a;
    c += 10;
    return helper1(helper(b, c));
  }

Code-gen for -O2 -mattr=+alu32 -mcpu=v3 will be:

  test:
    1: w1 = *(u32 *)(r1 + 0)
    2: w2 += 10
    3: call helper
    4: r1 = r0
    5: call helper1
    6: exit

The return value of "helper" is u32 and is used as u64 arg for helper1
directly. For bpf-to-bpf call, I think it is correct, because the value
define happens inside bpf program, and 32-bit opt pass does pass the
definition information from callee to caller at exit, so the sub-register
define instruction will be marked, and the register will be zero extended,
so no problem to use it directly as 64-bit to helper1.

But for helper functions, they are done by native code which may not follow
this convention. For example, on arm32, calling helper functions are just
jump to and execute native code. And if the helper returns u32, it just set
r0, no clearing of r1 which is the high 32-bit in the register pair
modeling eBPF R0.

Inside verifier, we assume helper return values are all 64-bit, without new
return types, there won't be zero-extension on u32 value if 32-bit JIT
back-ends haven't done correct job to clear high 32-bit of return value if
the helper function is 64-bit type. This requires JIT back-ends check
helper function prototypes and do code-gen accordingly. Maybe is is too
strict to ask JIT backends to do this, we should just let compiler always
generates extra zero extension if the 32-bit return value comes from
external helper functions?

> imo the code base is not ready for semi-automatic remarking of
> helpers with ARG_ANYTHING32 when helper is accepting 32-bit value.
> Definitely not something short term and not a prerequisite for this set.
>
> Longer term... if we introduce ARG_ANYTHING32, what would that mean?

At the moment I am thinking ARG_ANYTHING32 just means the read of the value
is low 32-bit only.

> Would the verifier insert zext before calling the helper automatically
> and we can drop truncation in the helper? May be useful?

Yes, after 32-bit opt enabled, JIT back-ends doesn't need to generate high
32-bit clearance instructions if once instruction is not marked. Verifier
is responsible for inserting zext before calling the helper if the
definition of the arguments is a sub-register define and if the argument
usage if 64-bit.

> What about passing negative value ?
> ld_imm64 r2, -1
> call foo
> is a valid program.

ld_imm64 is a 64-bit define, so 32-bit opt pass won't touch the sequence.

>
>>  	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
>>  	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
>>  	ARG_PTR_TO_INT,		/* pointer to int */
>> @@ -210,7 +213,8 @@ enum bpf_arg_type {
>>  
>>  /* type of values returned from helper functions */
>>  enum bpf_return_type {
>> -	RET_INTEGER,			/* function returns integer */
>> +	RET_INTEGER,			/* function returns 32-bit integer */
>> +	RET_INTEGER64,			/* function returns 64-bit integer */
>
> These type of annotations are dangerous too since they don't consider sign
> of the return value. In BPF ISA all arguments and return values are 64-bit.
> When it's full 64-bit we don't need to specify the sign, since sing-bit
> will be interpreted by the program and the verifier doesn't need to worry.
> If we say that helper returns 32-bit we need state whether it's signed or not
> for the verifier to analyze it properly.

I feel signed or unsigned will just work, for example:

  s32 helper(u64, u64);
  s64 helper1(u64);

  s64 test(u32 *a, u32 c)
  {
    u32 b = *a;
    c += 10;
    return helper1(helper(b, c));
  }

  test:
    w1 = *(u32 *)(r1 + 0)
    w2 += 10
    call helper
    r1 = r0
    r1 <<= 32
    r1 s>>= 32
    call helper1
    exit
    
"helper" is RET_INTEGER which means returning 32-bit, and helper1 is
RET_INTEGER64. LLVM compiler should have generated explicit signed
extension sequence like above to sign extend return value of "helper". And
because is is RET_INTEGER and because there is later 64-bit use in the
following "r1 = r0" move, it is fine to insert zero extension after the
call, the later signed extension sequence will still work.

Regards,
Jiong

> I think it's the best to drop this patch. I don't think the rest of the set
> really needs it. It looks to me as a last minute optimization that I really
> wish wasn't there, because the rest we've discussed in depth and the set
> was practically ready to land, but now bpf-next is closed.
> Please resubmit after it reopens.
Alexei Starovoitov May 8, 2019, 5:51 p.m. UTC | #6
On Wed, May 08, 2019 at 03:45:12PM +0100, Jiong Wang wrote:
> 
> I might be misunderstanding your points, please just shout if I am wrong.
> 
> Suppose the following BPF code:
> 
>   unsigned helper(unsigned long long, unsigned long long);
>   unsigned long long test(unsigned *a, unsigned int c)
>   {
>     unsigned int b = *a;
>     c += 10;
>     return helper(b, c);
>   }
> 
> We get the following instruction sequence by latest llvm
> (-O2 -mattr=+alu32 -mcpu=v3)
> 
>   test:
>     1: w1 = *(u32 *)(r1 + 0)
>     2: w2 += 10
>     3: call helper
>     4: exit
> 
> Argument Types
> ===
> Now instruction 1 and 2 are sub-register defines, and instruction 3, the
> call, use them implicitly.
> 
> Without the introduction of the new ARG_CONST_SIZE32 and
> ARG_CONST_SIZE32_OR_ZERO, we don't know what should be done with w1 and
> w2, zero-extend them should be fine for all cases, but could resulting in a
> few unnecessary zero-extension inserted.

I don't think we're on the same page.
The argument type is _const_.
In the example above they are not _const_.

> 
> And that why I introduce these new argument types, without them, there
> could be more than 10% extra zext inserted on benchmarks like bpf_lxc.

10% extra ? so be it.
We're talking past each other here.
I agree with your optimization goal, but I think you're missing
the safety concerns I'm trying to explain.

> 
> But for helper functions, they are done by native code which may not follow
> this convention. For example, on arm32, calling helper functions are just
> jump to and execute native code. And if the helper returns u32, it just set
> r0, no clearing of r1 which is the high 32-bit in the register pair
> modeling eBPF R0.

it's arm32 bug then. All helpers _must_ return 64-bit back to bpf prog
and _must_ accept 64-bit from bpf prog.
Jiong Wang May 9, 2019, 12:32 p.m. UTC | #7
Alexei Starovoitov writes:

> On Wed, May 08, 2019 at 03:45:12PM +0100, Jiong Wang wrote:
>> 
>> I might be misunderstanding your points, please just shout if I am wrong.
>> 
>> Suppose the following BPF code:
>> 
>>   unsigned helper(unsigned long long, unsigned long long);
>>   unsigned long long test(unsigned *a, unsigned int c)
>>   {
>>     unsigned int b = *a;
>>     c += 10;
>>     return helper(b, c);
>>   }
>> 
>> We get the following instruction sequence by latest llvm
>> (-O2 -mattr=+alu32 -mcpu=v3)
>> 
>>   test:
>>     1: w1 = *(u32 *)(r1 + 0)
>>     2: w2 += 10
>>     3: call helper
>>     4: exit
>> 
>> Argument Types
>> ===
>> Now instruction 1 and 2 are sub-register defines, and instruction 3, the
>> call, use them implicitly.
>> 
>> Without the introduction of the new ARG_CONST_SIZE32 and
>> ARG_CONST_SIZE32_OR_ZERO, we don't know what should be done with w1 and
>> w2, zero-extend them should be fine for all cases, but could resulting in a
>> few unnecessary zero-extension inserted.
>
> I don't think we're on the same page.
> The argument type is _const_.
> In the example above they are not _const_.

Right, have read check_func_arg + check_helper_mem_access again.

Looks like ARG_CONST_SIZE* are designed for describing memory access size
for things like bounds checking. It must be a constant for stack access,
otherwise prog will be rejected, but it looks to me variables are allowed
for pkt/map access.

But pkt/map has extra range info. So, indeed, ARG_CONST_SIZE32* are
unnecessary, the width could be figured out through the range.

Will just drop this patch in next version.

And sorry for repeating it again, I am still concerned on the issue
described at https://www.spinics.net/lists/netdev/msg568678.html.

To be simple, zext insertion is based on eBPF ISA and assumes all
sub-register defines from alu32 or narrow loads need it if the underlying
hardware arches don't do it. However, some arches support hardware zext
partially. For example, PowerPC, SPARC etc are 64-bit arches, while they
don't do hardware zext on alu32, they do it for narrow loads. And RISCV is
even more special, some alu32 has hardware zext, some don't.

At the moment we have single backend hook "bpf_jit_hardware_zext", once a
backend enable it, verifier just insert zero extension for all identified
alu32 and narrow loads.

Given verifier analysis info is not pushed down to JIT back-ends, verifier
needs more back-end info pushed up from back-ends. Do you think make sense
to introduce another hook "bpf_jit_hardware_zext_narrow_load" to at least
prevent unnecessary zext inserted for narrowed loads for arches like
PowerPC, SPARC?

The hooks to control verifier zext insertion then becomes two:

  bpf_jit_hardware_zext_alu32
  bpf_jit_hardware_zext_narrow_load

>> And that why I introduce these new argument types, without them, there
>> could be more than 10% extra zext inserted on benchmarks like bpf_lxc.
>
> 10% extra ? so be it.
> We're talking past each other here.
> I agree with your optimization goal, but I think you're missing
> the safety concerns I'm trying to explain.
>> But for helper functions, they are done by native code which may not follow
>> this convention. For example, on arm32, calling helper functions are just
>> jump to and execute native code. And if the helper returns u32, it just set
>> r0, no clearing of r1 which is the high 32-bit in the register pair
>> modeling eBPF R0.
>
> it's arm32 bug then. All helpers _must_ return 64-bit back to bpf prog
> and _must_ accept 64-bit from bpf prog.
Jiong Wang May 9, 2019, 5:31 p.m. UTC | #8
Jiong Wang writes:

<snip>

> At the moment we have single backend hook "bpf_jit_hardware_zext", once a
> backend enable it, verifier just insert zero extension for all identified
> alu32 and narrow loads.
>
> Given verifier analysis info is not pushed down to JIT back-ends, verifier
> needs more back-end info pushed up from back-ends. Do you think make sense
> to introduce another hook "bpf_jit_hardware_zext_narrow_load"

Maybe just keep the current "bpf_jit_hardware_zext", but let it return
int/enum instead of bool. Then verifier could know hardware ability through
the enum value?


> to at least
> prevent unnecessary zext inserted for narrowed loads for arches like
> PowerPC, SPARC?
>
> The hooks to control verifier zext insertion then becomes two:
>
>   bpf_jit_hardware_zext_alu32
>   bpf_jit_hardware_zext_narrow_load
>
>>> And that why I introduce these new argument types, without them, there
>>> could be more than 10% extra zext inserted on benchmarks like bpf_lxc.
>>
>> 10% extra ? so be it.
>> We're talking past each other here.
>> I agree with your optimization goal, but I think you're missing
>> the safety concerns I'm trying to explain.
>>> But for helper functions, they are done by native code which may not follow
>>> this convention. For example, on arm32, calling helper functions are just
>>> jump to and execute native code. And if the helper returns u32, it just set
>>> r0, no clearing of r1 which is the high 32-bit in the register pair
>>> modeling eBPF R0.
>>
>> it's arm32 bug then. All helpers _must_ return 64-bit back to bpf prog
>> and _must_ accept 64-bit from bpf prog.
Alexei Starovoitov May 10, 2019, 1:53 a.m. UTC | #9
On Thu, May 09, 2019 at 01:32:30PM +0100, Jiong Wang wrote:
> 
> Alexei Starovoitov writes:
> 
> > On Wed, May 08, 2019 at 03:45:12PM +0100, Jiong Wang wrote:
> >> 
> >> I might be misunderstanding your points, please just shout if I am wrong.
> >> 
> >> Suppose the following BPF code:
> >> 
> >>   unsigned helper(unsigned long long, unsigned long long);
> >>   unsigned long long test(unsigned *a, unsigned int c)
> >>   {
> >>     unsigned int b = *a;
> >>     c += 10;
> >>     return helper(b, c);
> >>   }
> >> 
> >> We get the following instruction sequence by latest llvm
> >> (-O2 -mattr=+alu32 -mcpu=v3)
> >> 
> >>   test:
> >>     1: w1 = *(u32 *)(r1 + 0)
> >>     2: w2 += 10
> >>     3: call helper
> >>     4: exit
> >> 
> >> Argument Types
> >> ===
> >> Now instruction 1 and 2 are sub-register defines, and instruction 3, the
> >> call, use them implicitly.
> >> 
> >> Without the introduction of the new ARG_CONST_SIZE32 and
> >> ARG_CONST_SIZE32_OR_ZERO, we don't know what should be done with w1 and
> >> w2, zero-extend them should be fine for all cases, but could resulting in a
> >> few unnecessary zero-extension inserted.
> >
> > I don't think we're on the same page.
> > The argument type is _const_.
> > In the example above they are not _const_.
> 
> Right, have read check_func_arg + check_helper_mem_access again.
> 
> Looks like ARG_CONST_SIZE* are designed for describing memory access size
> for things like bounds checking. It must be a constant for stack access,
> otherwise prog will be rejected, but it looks to me variables are allowed
> for pkt/map access.
> 
> But pkt/map has extra range info. So, indeed, ARG_CONST_SIZE32* are
> unnecessary, the width could be figured out through the range.
> 
> Will just drop this patch in next version.
> 
> And sorry for repeating it again, I am still concerned on the issue
> described at https://www.spinics.net/lists/netdev/msg568678.html.
> 
> To be simple, zext insertion is based on eBPF ISA and assumes all
> sub-register defines from alu32 or narrow loads need it if the underlying

It's not an assumption. It's a requirement. If JIT is not zeroing
upper 32-bits after 32-bit alu or narrow load it's a bug.

> hardware arches don't do it. However, some arches support hardware zext
> partially. For example, PowerPC, SPARC etc are 64-bit arches, while they
> don't do hardware zext on alu32, they do it for narrow loads. And RISCV is
> even more special, some alu32 has hardware zext, some don't.
> 
> At the moment we have single backend hook "bpf_jit_hardware_zext", once a
> backend enable it, verifier just insert zero extension for all identified
> alu32 and narrow loads.
> 
> Given verifier analysis info is not pushed down to JIT back-ends, verifier
> needs more back-end info pushed up from back-ends. Do you think make sense
> to introduce another hook "bpf_jit_hardware_zext_narrow_load" to at least
> prevent unnecessary zext inserted for narrowed loads for arches like
> PowerPC, SPARC?
> 
> The hooks to control verifier zext insertion then becomes two:
> 
>   bpf_jit_hardware_zext_alu32
>   bpf_jit_hardware_zext_narrow_load

and what to do with other combinations?
Like in some cases narrow load on particular arch will be zero extended
by hw and if it's misaligned or some other condition then it will not be?
It doesn't feel that we can enumerate all such combinations.
It feels 'bpf_jit_hardware_zext' backend hook isn't quite working.
It optimizes out some zext, but may be adding unnecessary extra zexts.

May be it should be a global flag from the verifier unidirectional to JITs
that will say "the verifier inserted MOV32 where necessary. JIT doesn't
need to do zext manually".
And then JITs will remove MOV32 when hw does it automatically.
Removal should be easy, since such insn will be right after corresponding
alu32 or narrow load.
Jiong Wang May 10, 2019, 8:30 a.m. UTC | #10
Alexei Starovoitov writes:

> On Thu, May 09, 2019 at 01:32:30PM +0100, Jiong Wang wrote:
>> 
>> Alexei Starovoitov writes:
>> 
>> > On Wed, May 08, 2019 at 03:45:12PM +0100, Jiong Wang wrote:
>> >> 
>> >> I might be misunderstanding your points, please just shout if I am wrong.
>> >> 
>> >> Suppose the following BPF code:
>> >> 
>> >>   unsigned helper(unsigned long long, unsigned long long);
>> >>   unsigned long long test(unsigned *a, unsigned int c)
>> >>   {
>> >>     unsigned int b = *a;
>> >>     c += 10;
>> >>     return helper(b, c);
>> >>   }
>> >> 
>> >> We get the following instruction sequence by latest llvm
>> >> (-O2 -mattr=+alu32 -mcpu=v3)
>> >> 
>> >>   test:
>> >>     1: w1 = *(u32 *)(r1 + 0)
>> >>     2: w2 += 10
>> >>     3: call helper
>> >>     4: exit
>> >> 
>> >> Argument Types
>> >> ===
>> >> Now instruction 1 and 2 are sub-register defines, and instruction 3, the
>> >> call, use them implicitly.
>> >> 
>> >> Without the introduction of the new ARG_CONST_SIZE32 and
>> >> ARG_CONST_SIZE32_OR_ZERO, we don't know what should be done with w1 and
>> >> w2, zero-extend them should be fine for all cases, but could resulting in a
>> >> few unnecessary zero-extension inserted.
>> >
>> > I don't think we're on the same page.
>> > The argument type is _const_.
>> > In the example above they are not _const_.
>> 
>> Right, have read check_func_arg + check_helper_mem_access again.
>> 
>> Looks like ARG_CONST_SIZE* are designed for describing memory access size
>> for things like bounds checking. It must be a constant for stack access,
>> otherwise prog will be rejected, but it looks to me variables are allowed
>> for pkt/map access.
>> 
>> But pkt/map has extra range info. So, indeed, ARG_CONST_SIZE32* are
>> unnecessary, the width could be figured out through the range.
>> 
>> Will just drop this patch in next version.
>> 
>> And sorry for repeating it again, I am still concerned on the issue
>> described at https://www.spinics.net/lists/netdev/msg568678.html.
>> 
>> To be simple, zext insertion is based on eBPF ISA and assumes all
>> sub-register defines from alu32 or narrow loads need it if the underlying
>
> It's not an assumption. It's a requirement. If JIT is not zeroing
> upper 32-bits after 32-bit alu or narrow load it's a bug.
>
>> hardware arches don't do it. However, some arches support hardware zext
>> partially. For example, PowerPC, SPARC etc are 64-bit arches, while they
>> don't do hardware zext on alu32, they do it for narrow loads. And RISCV is
>> even more special, some alu32 has hardware zext, some don't.
>> 
>> At the moment we have single backend hook "bpf_jit_hardware_zext", once a
>> backend enable it, verifier just insert zero extension for all identified
>> alu32 and narrow loads.
>> 
>> Given verifier analysis info is not pushed down to JIT back-ends, verifier
>> needs more back-end info pushed up from back-ends. Do you think make sense
>> to introduce another hook "bpf_jit_hardware_zext_narrow_load" to at least
>> prevent unnecessary zext inserted for narrowed loads for arches like
>> PowerPC, SPARC?
>> 
>> The hooks to control verifier zext insertion then becomes two:
>> 
>>   bpf_jit_hardware_zext_alu32
>>   bpf_jit_hardware_zext_narrow_load
>
> and what to do with other combinations?
> Like in some cases narrow load on particular arch will be zero extended
> by hw and if it's misaligned or some other condition then it will not be? 
> It doesn't feel that we can enumerate all such combinations.

Yes, and above narrow_load is just an example. As mentioned, behaviour on
alu32 also diverse on some arches.

> It feels 'bpf_jit_hardware_zext' backend hook isn't quite working.

It is still useful for x86_64 and aarch64 to disable verifier insertion
pass completely. But then perhaps should be renamed into
"bpf_jit_verifier_zext". Returning false means verifier should disable the
insertion completely.

> It optimizes out some zext, but may be adding unnecessary extra zexts.

This is exactly my concern.

> May be it should be a global flag from the verifier unidirectional to JITs
> that will say "the verifier inserted MOV32 where necessary. JIT doesn't
> need to do zext manually".
> And then JITs will remove MOV32 when hw does it automatically.
> Removal should be easy, since such insn will be right after corresponding
> alu32 or narrow load.

OK, so you mean do a simple peephole to combine insns. JIT looks forward
the next insn, if it is mov32 with dst_src == src_reg, then skip it. And
only do this when jitting a sub-register write eBPF insn and there is
hardware zext support.

I guess such special mov32 won't be generated by compiler that it won't be
jump destination hence skip it is safe.

For zero extension insertion part of this set, I am going to do the
following changes in next version:

  1. verifier inserts special "mov32" (dst_reg == src_reg) as "zext".
     JIT could still save zext for the other "mov32", but should always do
     zext for this special "mov32".
  2. rename 'bpf_jit_hardware_zext' to 'bpf_jit_verifier_zext' which
     returns false at default to disable zext insertion.
  3. JITs want verifier zext override bpf_jit_verifier_zext to return
     true and should skip unnecessary mov32 as described above.

Looks good?

Regards,
Jiong
Alexei Starovoitov May 10, 2019, 8:10 p.m. UTC | #11
On Fri, May 10, 2019 at 09:30:28AM +0100, Jiong Wang wrote:
> 
> Alexei Starovoitov writes:
> 
> > On Thu, May 09, 2019 at 01:32:30PM +0100, Jiong Wang wrote:
> >> 
> >> Alexei Starovoitov writes:
> >> 
> >> > On Wed, May 08, 2019 at 03:45:12PM +0100, Jiong Wang wrote:
> >> >> 
> >> >> I might be misunderstanding your points, please just shout if I am wrong.
> >> >> 
> >> >> Suppose the following BPF code:
> >> >> 
> >> >>   unsigned helper(unsigned long long, unsigned long long);
> >> >>   unsigned long long test(unsigned *a, unsigned int c)
> >> >>   {
> >> >>     unsigned int b = *a;
> >> >>     c += 10;
> >> >>     return helper(b, c);
> >> >>   }
> >> >> 
> >> >> We get the following instruction sequence by latest llvm
> >> >> (-O2 -mattr=+alu32 -mcpu=v3)
> >> >> 
> >> >>   test:
> >> >>     1: w1 = *(u32 *)(r1 + 0)
> >> >>     2: w2 += 10
> >> >>     3: call helper
> >> >>     4: exit
> >> >> 
> >> >> Argument Types
> >> >> ===
> >> >> Now instruction 1 and 2 are sub-register defines, and instruction 3, the
> >> >> call, use them implicitly.
> >> >> 
> >> >> Without the introduction of the new ARG_CONST_SIZE32 and
> >> >> ARG_CONST_SIZE32_OR_ZERO, we don't know what should be done with w1 and
> >> >> w2, zero-extend them should be fine for all cases, but could resulting in a
> >> >> few unnecessary zero-extension inserted.
> >> >
> >> > I don't think we're on the same page.
> >> > The argument type is _const_.
> >> > In the example above they are not _const_.
> >> 
> >> Right, have read check_func_arg + check_helper_mem_access again.
> >> 
> >> Looks like ARG_CONST_SIZE* are designed for describing memory access size
> >> for things like bounds checking. It must be a constant for stack access,
> >> otherwise prog will be rejected, but it looks to me variables are allowed
> >> for pkt/map access.
> >> 
> >> But pkt/map has extra range info. So, indeed, ARG_CONST_SIZE32* are
> >> unnecessary, the width could be figured out through the range.
> >> 
> >> Will just drop this patch in next version.
> >> 
> >> And sorry for repeating it again, I am still concerned on the issue
> >> described at https://www.spinics.net/lists/netdev/msg568678.html.
> >> 
> >> To be simple, zext insertion is based on eBPF ISA and assumes all
> >> sub-register defines from alu32 or narrow loads need it if the underlying
> >
> > It's not an assumption. It's a requirement. If JIT is not zeroing
> > upper 32-bits after 32-bit alu or narrow load it's a bug.
> >
> >> hardware arches don't do it. However, some arches support hardware zext
> >> partially. For example, PowerPC, SPARC etc are 64-bit arches, while they
> >> don't do hardware zext on alu32, they do it for narrow loads. And RISCV is
> >> even more special, some alu32 has hardware zext, some don't.
> >> 
> >> At the moment we have single backend hook "bpf_jit_hardware_zext", once a
> >> backend enable it, verifier just insert zero extension for all identified
> >> alu32 and narrow loads.
> >> 
> >> Given verifier analysis info is not pushed down to JIT back-ends, verifier
> >> needs more back-end info pushed up from back-ends. Do you think make sense
> >> to introduce another hook "bpf_jit_hardware_zext_narrow_load" to at least
> >> prevent unnecessary zext inserted for narrowed loads for arches like
> >> PowerPC, SPARC?
> >> 
> >> The hooks to control verifier zext insertion then becomes two:
> >> 
> >>   bpf_jit_hardware_zext_alu32
> >>   bpf_jit_hardware_zext_narrow_load
> >
> > and what to do with other combinations?
> > Like in some cases narrow load on particular arch will be zero extended
> > by hw and if it's misaligned or some other condition then it will not be? 
> > It doesn't feel that we can enumerate all such combinations.
> 
> Yes, and above narrow_load is just an example. As mentioned, behaviour on
> alu32 also diverse on some arches.
> 
> > It feels 'bpf_jit_hardware_zext' backend hook isn't quite working.
> 
> It is still useful for x86_64 and aarch64 to disable verifier insertion
> pass completely. But then perhaps should be renamed into
> "bpf_jit_verifier_zext". Returning false means verifier should disable the
> insertion completely.

I think the name is too cryptic.
May be call it bpf_jit_needs_zext ?
x64/arm64 will set it false and the rest to true ?

> > It optimizes out some zext, but may be adding unnecessary extra zexts.
> 
> This is exactly my concern.
> 
> > May be it should be a global flag from the verifier unidirectional to JITs
> > that will say "the verifier inserted MOV32 where necessary. JIT doesn't
> > need to do zext manually".
> > And then JITs will remove MOV32 when hw does it automatically.
> > Removal should be easy, since such insn will be right after corresponding
> > alu32 or narrow load.
> 
> OK, so you mean do a simple peephole to combine insns. JIT looks forward
> the next insn, if it is mov32 with dst_src == src_reg, then skip it. And
> only do this when jitting a sub-register write eBPF insn and there is
> hardware zext support.
> 
> I guess such special mov32 won't be generated by compiler that it won't be
> jump destination hence skip it is safe.
> 
> For zero extension insertion part of this set, I am going to do the
> following changes in next version:
> 
>   1. verifier inserts special "mov32" (dst_reg == src_reg) as "zext".
>      JIT could still save zext for the other "mov32", but should always do
>      zext for this special "mov32".

May be used mov32 with imm==1 as indicator that such mov32 is special?

>   2. rename 'bpf_jit_hardware_zext' to 'bpf_jit_verifier_zext' which
>      returns false at default to disable zext insertion.
>   3. JITs want verifier zext override bpf_jit_verifier_zext to return
>      true and should skip unnecessary mov32 as described above.
> 
> Looks good?

Kinda makes sense, but when x64/arm64 are saying 'dont do zext'
what verifier suppose to do? It will still do the analysis
and liveness marks, but only mov32 won't be inserted?
I guess that's fine, since BPF_F_TEST_RND_HI32 will use
the results of the analysis?
Please double check that BPF_F_TEST_RND_HI32 poisoning works on
32-bit archs too.
Jiong Wang May 10, 2019, 9:59 p.m. UTC | #12
Alexei Starovoitov writes:

> On Fri, May 10, 2019 at 09:30:28AM +0100, Jiong Wang wrote:
>> 
>> Alexei Starovoitov writes:
>> 
>> > On Thu, May 09, 2019 at 01:32:30PM +0100, Jiong Wang wrote:
>> >> 
>> >> Alexei Starovoitov writes:
>> >> 
>> >> > On Wed, May 08, 2019 at 03:45:12PM +0100, Jiong Wang wrote:
>> >> >> 
>> >> >> I might be misunderstanding your points, please just shout if I am wrong.
>> >> >> 
>> >> >> Suppose the following BPF code:
>> >> >> 
>> >> >>   unsigned helper(unsigned long long, unsigned long long);
>> >> >>   unsigned long long test(unsigned *a, unsigned int c)
>> >> >>   {
>> >> >>     unsigned int b = *a;
>> >> >>     c += 10;
>> >> >>     return helper(b, c);
>> >> >>   }
>> >> >> 
>> >> >> We get the following instruction sequence by latest llvm
>> >> >> (-O2 -mattr=+alu32 -mcpu=v3)
>> >> >> 
>> >> >>   test:
>> >> >>     1: w1 = *(u32 *)(r1 + 0)
>> >> >>     2: w2 += 10
>> >> >>     3: call helper
>> >> >>     4: exit
>> >> >> 
>> >> >> Argument Types
>> >> >> ===
>> >> >> Now instruction 1 and 2 are sub-register defines, and instruction 3, the
>> >> >> call, use them implicitly.
>> >> >> 
>> >> >> Without the introduction of the new ARG_CONST_SIZE32 and
>> >> >> ARG_CONST_SIZE32_OR_ZERO, we don't know what should be done with w1 and
>> >> >> w2, zero-extend them should be fine for all cases, but could resulting in a
>> >> >> few unnecessary zero-extension inserted.
>> >> >
>> >> > I don't think we're on the same page.
>> >> > The argument type is _const_.
>> >> > In the example above they are not _const_.
>> >> 
>> >> Right, have read check_func_arg + check_helper_mem_access again.
>> >> 
>> >> Looks like ARG_CONST_SIZE* are designed for describing memory access size
>> >> for things like bounds checking. It must be a constant for stack access,
>> >> otherwise prog will be rejected, but it looks to me variables are allowed
>> >> for pkt/map access.
>> >> 
>> >> But pkt/map has extra range info. So, indeed, ARG_CONST_SIZE32* are
>> >> unnecessary, the width could be figured out through the range.
>> >> 
>> >> Will just drop this patch in next version.
>> >> 
>> >> And sorry for repeating it again, I am still concerned on the issue
>> >> described at https://www.spinics.net/lists/netdev/msg568678.html.
>> >> 
>> >> To be simple, zext insertion is based on eBPF ISA and assumes all
>> >> sub-register defines from alu32 or narrow loads need it if the underlying
>> >
>> > It's not an assumption. It's a requirement. If JIT is not zeroing
>> > upper 32-bits after 32-bit alu or narrow load it's a bug.
>> >
>> >> hardware arches don't do it. However, some arches support hardware zext
>> >> partially. For example, PowerPC, SPARC etc are 64-bit arches, while they
>> >> don't do hardware zext on alu32, they do it for narrow loads. And RISCV is
>> >> even more special, some alu32 has hardware zext, some don't.
>> >> 
>> >> At the moment we have single backend hook "bpf_jit_hardware_zext", once a
>> >> backend enable it, verifier just insert zero extension for all identified
>> >> alu32 and narrow loads.
>> >> 
>> >> Given verifier analysis info is not pushed down to JIT back-ends, verifier
>> >> needs more back-end info pushed up from back-ends. Do you think make sense
>> >> to introduce another hook "bpf_jit_hardware_zext_narrow_load" to at least
>> >> prevent unnecessary zext inserted for narrowed loads for arches like
>> >> PowerPC, SPARC?
>> >> 
>> >> The hooks to control verifier zext insertion then becomes two:
>> >> 
>> >>   bpf_jit_hardware_zext_alu32
>> >>   bpf_jit_hardware_zext_narrow_load
>> >
>> > and what to do with other combinations?
>> > Like in some cases narrow load on particular arch will be zero extended
>> > by hw and if it's misaligned or some other condition then it will not be? 
>> > It doesn't feel that we can enumerate all such combinations.
>> 
>> Yes, and above narrow_load is just an example. As mentioned, behaviour on
>> alu32 also diverse on some arches.
>> 
>> > It feels 'bpf_jit_hardware_zext' backend hook isn't quite working.
>> 
>> It is still useful for x86_64 and aarch64 to disable verifier insertion
>> pass completely. But then perhaps should be renamed into
>> "bpf_jit_verifier_zext". Returning false means verifier should disable the
>> insertion completely.
>
> I think the name is too cryptic.
> May be call it bpf_jit_needs_zext ?

Ack.

> x64/arm64 will set it false and the rest to true ?

Ack.

>> > It optimizes out some zext, but may be adding unnecessary extra zexts.
>> 
>> This is exactly my concern.
>> 
>> > May be it should be a global flag from the verifier unidirectional to JITs
>> > that will say "the verifier inserted MOV32 where necessary. JIT doesn't
>> > need to do zext manually".
>> > And then JITs will remove MOV32 when hw does it automatically.
>> > Removal should be easy, since such insn will be right after corresponding
>> > alu32 or narrow load.
>> 
>> OK, so you mean do a simple peephole to combine insns. JIT looks forward
>> the next insn, if it is mov32 with dst_src == src_reg, then skip it. And
>> only do this when jitting a sub-register write eBPF insn and there is
>> hardware zext support.
>> 
>> I guess such special mov32 won't be generated by compiler that it won't be
>> jump destination hence skip it is safe.
>> 
>> For zero extension insertion part of this set, I am going to do the
>> following changes in next version:
>> 
>>   1. verifier inserts special "mov32" (dst_reg == src_reg) as "zext".
>>      JIT could still save zext for the other "mov32", but should always do
>>      zext for this special "mov32".
>
> May be used mov32 with imm==1 as indicator that such mov32 is special?

OK, will go with it.

>
>>   2. rename 'bpf_jit_hardware_zext' to 'bpf_jit_verifier_zext' which
>>      returns false at default to disable zext insertion.
>>   3. JITs want verifier zext override bpf_jit_verifier_zext to return
>>      true and should skip unnecessary mov32 as described above.
>> 
>> Looks good?
>
> Kinda makes sense, but when x64/arm64 are saying 'dont do zext'
> what verifier suppose to do? It will still do the analysis
> and liveness marks, but only mov32 won't be inserted?

Yes. The analysis part is still enabled, it is quite integrated with
existing liveness tracking infrastructure, and is not heavy.

zext insertion part is disabled.

> I guess that's fine, since BPF_F_TEST_RND_HI32 will use
> the results of the analysis?

Yes, hi32 poisoning needs it.

> Please double check that BPF_F_TEST_RND_HI32 poisoning works on
> 32-bit archs too.

OK. I don't have 32-bit host env at the moment, but will try to sort this
out.

Regards,
Jiong
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9a21848..11a5fb9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -198,9 +198,12 @@  enum bpf_arg_type {
 
 	ARG_CONST_SIZE,		/* number of bytes accessed from memory */
 	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
+	ARG_CONST_SIZE32,	/* Likewise, but size fits into 32-bit */
+	ARG_CONST_SIZE32_OR_ZERO,	/* Ditto */
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
+	ARG_ANYTHING32,		/* Likewise, but it is a 32-bit argument */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
 	ARG_PTR_TO_INT,		/* pointer to int */
@@ -210,7 +213,8 @@  enum bpf_arg_type {
 
 /* type of values returned from helper functions */
 enum bpf_return_type {
-	RET_INTEGER,			/* function returns integer */
+	RET_INTEGER,			/* function returns 32-bit integer */
+	RET_INTEGER64,			/* function returns 64-bit integer */
 	RET_VOID,			/* function doesn't return anything */
 	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ace8c22..2792eda 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2067,7 +2067,7 @@  const struct bpf_func_proto bpf_tail_call_proto = {
 	.ret_type	= RET_VOID,
 	.arg1_type	= ARG_PTR_TO_CTX,
 	.arg2_type	= ARG_CONST_MAP_PTR,
-	.arg3_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING32,
 };
 
 /* Stub for JITs that only support cBPF. eBPF programs are interpreted.
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4266ffd..60f6e31 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -157,7 +157,7 @@  BPF_CALL_0(bpf_ktime_get_ns)
 const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 	.func		= bpf_ktime_get_ns,
 	.gpl_only	= true,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 };
 
 BPF_CALL_0(bpf_get_current_pid_tgid)
@@ -173,7 +173,7 @@  BPF_CALL_0(bpf_get_current_pid_tgid)
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
 	.func		= bpf_get_current_pid_tgid,
 	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 };
 
 BPF_CALL_0(bpf_get_current_uid_gid)
@@ -193,7 +193,7 @@  BPF_CALL_0(bpf_get_current_uid_gid)
 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
 	.func		= bpf_get_current_uid_gid,
 	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 };
 
 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
@@ -221,7 +221,7 @@  const struct bpf_func_proto bpf_get_current_comm_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
-	.arg2_type	= ARG_CONST_SIZE,
+	.arg2_type	= ARG_CONST_SIZE32,
 };
 
 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
@@ -331,7 +331,7 @@  BPF_CALL_0(bpf_get_current_cgroup_id)
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 	.func		= bpf_get_current_cgroup_id,
 	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 };
 
 #ifdef CONFIG_CGROUP_BPF
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2717172..07ab563 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2492,7 +2492,9 @@  static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
 static bool arg_type_is_mem_size(enum bpf_arg_type type)
 {
 	return type == ARG_CONST_SIZE ||
-	       type == ARG_CONST_SIZE_OR_ZERO;
+	       type == ARG_CONST_SIZE_OR_ZERO ||
+	       type == ARG_CONST_SIZE32 ||
+	       type == ARG_CONST_SIZE32_OR_ZERO;
 }
 
 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
@@ -2526,7 +2528,7 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 	if (err)
 		return err;
 
-	if (arg_type == ARG_ANYTHING) {
+	if (arg_type == ARG_ANYTHING || arg_type == ARG_ANYTHING32) {
 		if (is_pointer_value(env, regno)) {
 			verbose(env, "R%d leaks addr into helper function\n",
 				regno);
@@ -2554,7 +2556,9 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			 type != expected_type)
 			goto err_type;
 	} else if (arg_type == ARG_CONST_SIZE ||
-		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
+		   arg_type == ARG_CONST_SIZE_OR_ZERO ||
+		   arg_type == ARG_CONST_SIZE32 ||
+		   arg_type == ARG_CONST_SIZE32_OR_ZERO) {
 		expected_type = SCALAR_VALUE;
 		if (type != expected_type)
 			goto err_type;
@@ -2660,7 +2664,8 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 					      meta->map_ptr->value_size, false,
 					      meta);
 	} else if (arg_type_is_mem_size(arg_type)) {
-		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
+		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO ||
+					  arg_type == ARG_CONST_SIZE32_OR_ZERO);
 
 		/* remember the mem_size which may be used later
 		 * to refine return values.
@@ -3333,7 +3338,7 @@  static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	}
 
 	/* update return register (already marked as written above) */
-	if (fn->ret_type == RET_INTEGER) {
+	if (fn->ret_type == RET_INTEGER || fn->ret_type == RET_INTEGER64) {
 		/* sets type to SCALAR_VALUE */
 		mark_reg_unknown(env, regs, BPF_REG_0);
 	} else if (fn->ret_type == RET_VOID) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8607aba..f300b68 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -370,7 +370,7 @@  BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
 static const struct bpf_func_proto bpf_perf_event_read_proto = {
 	.func		= bpf_perf_event_read,
 	.gpl_only	= true,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_ANYTHING,
 };
@@ -503,7 +503,7 @@  BPF_CALL_0(bpf_get_current_task)
 static const struct bpf_func_proto bpf_get_current_task_proto = {
 	.func		= bpf_get_current_task,
 	.gpl_only	= true,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 };
 
 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
diff --git a/net/core/filter.c b/net/core/filter.c
index 55bfc94..72f2abe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1695,9 +1695,9 @@  static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING32,
 	.arg3_type	= ARG_PTR_TO_MEM,
-	.arg4_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_CONST_SIZE32,
 	.arg5_type	= ARG_ANYTHING,
 };
 
@@ -1760,9 +1760,9 @@  static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING32,
 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
-	.arg4_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_CONST_SIZE32,
 };
 
 BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
@@ -1911,7 +1911,7 @@  static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING32,
 	.arg3_type	= ARG_ANYTHING,
 	.arg4_type	= ARG_ANYTHING,
 	.arg5_type	= ARG_ANYTHING,
@@ -1964,7 +1964,7 @@  static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING32,
 	.arg3_type	= ARG_ANYTHING,
 	.arg4_type	= ARG_ANYTHING,
 	.arg5_type	= ARG_ANYTHING,
@@ -2003,9 +2003,9 @@  static const struct bpf_func_proto bpf_csum_diff_proto = {
 	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
-	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg2_type	= ARG_CONST_SIZE32_OR_ZERO,
 	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
-	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_CONST_SIZE32_OR_ZERO,
 	.arg5_type	= ARG_ANYTHING,
 };
 
@@ -2186,7 +2186,7 @@  static const struct bpf_func_proto bpf_redirect_proto = {
 	.func           = bpf_redirect,
 	.gpl_only       = false,
 	.ret_type       = RET_INTEGER,
-	.arg1_type      = ARG_ANYTHING,
+	.arg1_type      = ARG_ANYTHING32,
 	.arg2_type      = ARG_ANYTHING,
 };
 
@@ -2964,7 +2964,7 @@  static const struct bpf_func_proto bpf_skb_change_proto_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING32,
 	.arg3_type	= ARG_ANYTHING,
 };
 
@@ -2984,7 +2984,7 @@  static const struct bpf_func_proto bpf_skb_change_type_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING32,
 };
 
 static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
@@ -3287,7 +3287,7 @@  static const struct bpf_func_proto bpf_skb_change_tail_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_ANYTHING32,
 	.arg3_type	= ARG_ANYTHING,
 };
 
@@ -3883,7 +3883,7 @@  static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
-	.arg3_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_CONST_SIZE32,
 	.arg4_type	= ARG_ANYTHING,
 };
 
@@ -3992,7 +3992,7 @@  static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
 	.arg2_type	= ARG_PTR_TO_MEM,
-	.arg3_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_CONST_SIZE32,
 	.arg4_type	= ARG_ANYTHING,
 };
 
@@ -4091,7 +4091,7 @@  BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
 	.func           = bpf_skb_cgroup_id,
 	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
+	.ret_type       = RET_INTEGER64,
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
@@ -4116,7 +4116,7 @@  BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
 	.func           = bpf_skb_ancestor_cgroup_id,
 	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
+	.ret_type       = RET_INTEGER64,
 	.arg1_type      = ARG_PTR_TO_CTX,
 	.arg2_type      = ARG_ANYTHING,
 };
@@ -4162,7 +4162,7 @@  BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
 static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
 	.func           = bpf_get_socket_cookie,
 	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
+	.ret_type       = RET_INTEGER64,
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
@@ -4174,7 +4174,7 @@  BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
 static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
 	.func		= bpf_get_socket_cookie_sock_addr,
 	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
@@ -4186,7 +4186,7 @@  BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
 static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
 	.func		= bpf_get_socket_cookie_sock_ops,
 	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
+	.ret_type	= RET_INTEGER64,
 	.arg1_type	= ARG_PTR_TO_CTX,
 };