diff mbox series

[PATCH/RFC,v2,bpf-next,05/19] bpf: split read liveness into REG_LIVE_READ64 and REG_LIVE_READ32

Message ID 1554925833-7333-6-git-send-email-jiong.wang@netronome.com
State RFC
Delegated to: BPF Maintainers
Headers show
Series bpf: eliminate zero extensions for sub-register writes | expand

Commit Message

Jiong Wang April 10, 2019, 7:50 p.m. UTC
Register liveness infrastructure doesn't track register read width at the
moment, while the width information will be needed for the later 32-bit
safety analysis pass.

This patch take the first step to split read liveness into REG_LIVE_READ64
and REG_LIVE_READ32.

Liveness propagation code are updated accordingly. They are taught to
understand how to propagate REG_LIVE_READ64 and REG_LIVE_READ32 at the same
propagation iteration. For example, "mark_reg_read" now propagate "flags"
which could be multiple read bits instead of the single REG_LIVE_READ64.

A write still screen off all width of reads.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
 include/linux/bpf_verifier.h |   8 +--
 kernel/bpf/verifier.c        | 119 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 113 insertions(+), 14 deletions(-)

Comments

Jakub Kicinski April 11, 2019, 2:52 a.m. UTC | #1
On Wed, 10 Apr 2019 20:50:19 +0100, Jiong Wang wrote:
> Register liveness infrastructure doesn't track register read width at the
> moment, while the width information will be needed for the later 32-bit
> safety analysis pass.
> 
> This patch take the first step to split read liveness into REG_LIVE_READ64
> and REG_LIVE_READ32.
> 
> Liveness propagation code are updated accordingly. They are taught to
> understand how to propagate REG_LIVE_READ64 and REG_LIVE_READ32 at the same
> propagation iteration. For example, "mark_reg_read" now propagate "flags"
> which could be multiple read bits instead of the single REG_LIVE_READ64.
> 
> A write still screen off all width of reads.
> 
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> ---
>  include/linux/bpf_verifier.h |   8 +--
>  kernel/bpf/verifier.c        | 119 +++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 113 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index b3ab61f..fba0ebb 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -36,9 +36,11 @@
>   */
>  enum bpf_reg_liveness {
>  	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
> -	REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
> -	REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
> -	REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */
> +	REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
> +	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
> +	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
> +	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
> +	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
>  };
>  
>  struct bpf_reg_state {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index bd30a65..44e4278 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1135,7 +1135,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
>   */
>  static int mark_reg_read(struct bpf_verifier_env *env,
>  			 const struct bpf_reg_state *state,
> -			 struct bpf_reg_state *parent)
> +			 struct bpf_reg_state *parent, u8 flags)
>  {
>  	bool writes = parent == state->parent; /* Observe write marks */
>  	int cnt = 0;
> @@ -1150,17 +1150,17 @@ static int mark_reg_read(struct bpf_verifier_env *env,
>  				parent->var_off.value, parent->off);
>  			return -EFAULT;
>  		}
> -		if (parent->live & REG_LIVE_READ)
> +		if ((parent->live & REG_LIVE_READ) == flags)
>  			/* The parentage chain never changes and
> -			 * this parent was already marked as LIVE_READ.
> +			 * this parent was already marked with all read bits.

Do we have to propagate all read bits?  Read64 is strictly stronger
than read32, as long as read64 is set on the parent we should be good?

>  			 * There is no need to keep walking the chain again and
> -			 * keep re-marking all parents as LIVE_READ.
> +			 * keep re-marking all parents with reads bits in flags.
>  			 * This case happens when the same register is read
>  			 * multiple times without writes into it in-between.
>  			 */
>  			break;
>  		/* ... then we depend on parent's value */
> -		parent->live |= REG_LIVE_READ;
> +		parent->live |= flags;
>  		state = parent;
>  		parent = state->parent;
>  		writes = true;

> @@ -6196,12 +6286,19 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env,
>  				  struct bpf_reg_state *reg,
>  				  struct bpf_reg_state *parent_reg)
>  {
> +	u8 parent_bits = parent_reg->live & REG_LIVE_READ;
> +	u8 bits = reg->live & REG_LIVE_READ;
> +	u8 bits_diff = parent_bits ^ bits;
> +	u8 bits_prop = bits_diff & bits;
>  	int err;
>  
> -	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
> +	/* "reg" and "parent_reg" has the same read bits, or the bit doesn't
> +	 * belong to "reg".
> +	 */
> +	if (!bits_diff || !bits_prop)

bits_prop is a subset of bits_diff, no?  !bits_prop is always true
if !bits_diff is true, no need to check both.

>  		return 0;
> 
> -	err = mark_reg_read(env, reg, parent_reg);
> +	err = mark_reg_read(env, reg, parent_reg, bits_prop);
>  	if (err)
>  		return err;
>
Jiong Wang April 11, 2019, 6:13 a.m. UTC | #2
> On 11 Apr 2019, at 03:52, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> 
> On Wed, 10 Apr 2019 20:50:19 +0100, Jiong Wang wrote:
>> Register liveness infrastructure doesn't track register read width at the
>> moment, while the width information will be needed for the later 32-bit
>> safety analysis pass.
>> 
>> This patch take the first step to split read liveness into REG_LIVE_READ64
>> and REG_LIVE_READ32.
>> 
>> Liveness propagation code are updated accordingly. They are taught to
>> understand how to propagate REG_LIVE_READ64 and REG_LIVE_READ32 at the same
>> propagation iteration. For example, "mark_reg_read" now propagate "flags"
>> which could be multiple read bits instead of the single REG_LIVE_READ64.
>> 
>> A write still screen off all width of reads.
>> 
>> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
>> ---
>> include/linux/bpf_verifier.h |   8 +--
>> kernel/bpf/verifier.c        | 119 +++++++++++++++++++++++++++++++++++++++----
>> 2 files changed, 113 insertions(+), 14 deletions(-)
>> 
>> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
>> index b3ab61f..fba0ebb 100644
>> --- a/include/linux/bpf_verifier.h
>> +++ b/include/linux/bpf_verifier.h
>> @@ -36,9 +36,11 @@
>>  */
>> enum bpf_reg_liveness {
>> 	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
>> -	REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
>> -	REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
>> -	REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */
>> +	REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
>> +	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
>> +	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
>> +	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
>> +	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
>> };
>> 
>> struct bpf_reg_state {
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index bd30a65..44e4278 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -1135,7 +1135,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
>>  */
>> static int mark_reg_read(struct bpf_verifier_env *env,
>> 			 const struct bpf_reg_state *state,
>> -			 struct bpf_reg_state *parent)
>> +			 struct bpf_reg_state *parent, u8 flags)
>> {
>> 	bool writes = parent == state->parent; /* Observe write marks */
>> 	int cnt = 0;
>> @@ -1150,17 +1150,17 @@ static int mark_reg_read(struct bpf_verifier_env *env,
>> 				parent->var_off.value, parent->off);
>> 			return -EFAULT;
>> 		}
>> -		if (parent->live & REG_LIVE_READ)
>> +		if ((parent->live & REG_LIVE_READ) == flags)
>> 			/* The parentage chain never changes and
>> -			 * this parent was already marked as LIVE_READ.
>> +			 * this parent was already marked with all read bits.
> 
> Do we have to propagate all read bits?  Read64 is strictly stronger
> than read32, as long as read64 is set on the parent we should be good?

We should be good, but I doubt there is value to differentiate on this in this
kind of HOT function.
 
> 
>> 			 * There is no need to keep walking the chain again and
>> -			 * keep re-marking all parents as LIVE_READ.
>> +			 * keep re-marking all parents with reads bits in flags.
>> 			 * This case happens when the same register is read
>> 			 * multiple times without writes into it in-between.
>> 			 */
>> 			break;
>> 		/* ... then we depend on parent's value */
>> -		parent->live |= REG_LIVE_READ;
>> +		parent->live |= flags;
>> 		state = parent;
>> 		parent = state->parent;
>> 		writes = true;
> 
>> @@ -6196,12 +6286,19 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env,
>> 				  struct bpf_reg_state *reg,
>> 				  struct bpf_reg_state *parent_reg)
>> {
>> +	u8 parent_bits = parent_reg->live & REG_LIVE_READ;
>> +	u8 bits = reg->live & REG_LIVE_READ;
>> +	u8 bits_diff = parent_bits ^ bits;
>> +	u8 bits_prop = bits_diff & bits;
>> 	int err;
>> 
>> -	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
>> +	/* "reg" and "parent_reg" has the same read bits, or the bit doesn't
>> +	 * belong to "reg".
>> +	 */
>> +	if (!bits_diff || !bits_prop)
> 
> bits_prop is a subset of bits_diff, no?  !bits_prop is always true
> if !bits_diff is true, no need to check both.

Bits_prop is a subset of bits_diff WHEN it comes from “reg", we don’t want to
do the propagation when the diff comes from “parent_reg”, so, we need to check
both.

Regards,
Jiong
 
> 
>> 		return 0;
>> 
>> -	err = mark_reg_read(env, reg, parent_reg);
>> +	err = mark_reg_read(env, reg, parent_reg, bits_prop);
>> 	if (err)
>> 		return err;
Jakub Kicinski April 11, 2019, 4:44 p.m. UTC | #3
On Thu, 11 Apr 2019 07:13:03 +0100, Jiong Wang wrote:
> >> @@ -1150,17 +1150,17 @@ static int mark_reg_read(struct bpf_verifier_env *env,
> >> 				parent->var_off.value, parent->off);
> >> 			return -EFAULT;
> >> 		}
> >> -		if (parent->live & REG_LIVE_READ)
> >> +		if ((parent->live & REG_LIVE_READ) == flags)
> >> 			/* The parentage chain never changes and
> >> -			 * this parent was already marked as LIVE_READ.
> >> +			 * this parent was already marked with all read bits.  
> > 
> > Do we have to propagate all read bits?  Read64 is strictly stronger
> > than read32, as long as read64 is set on the parent we should be good?  
> 
> We should be good, but I doubt there is value to differentiate on this in this
> kind of HOT function.

The entire if clause is an optimization.  I'm saying you can maintain it
as more aggressive.

> >> @@ -6196,12 +6286,19 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env,
> >> 				  struct bpf_reg_state *reg,
> >> 				  struct bpf_reg_state *parent_reg)
> >> {
> >> +	u8 parent_bits = parent_reg->live & REG_LIVE_READ;
> >> +	u8 bits = reg->live & REG_LIVE_READ;
> >> +	u8 bits_diff = parent_bits ^ bits;
> >> +	u8 bits_prop = bits_diff & bits;
> >> 	int err;
> >> 
> >> -	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
> >> +	/* "reg" and "parent_reg" has the same read bits, or the bit doesn't
> >> +	 * belong to "reg".
> >> +	 */
> >> +	if (!bits_diff || !bits_prop)  
> > 
> > bits_prop is a subset of bits_diff, no?  !bits_prop is always true
> > if !bits_diff is true, no need to check both.  
> 
> Bits_prop is a subset of bits_diff WHEN it comes from “reg", we don’t want to
> do the propagation when the diff comes from “parent_reg”, so, we need to check
> both.

Not sure what you're saying, in this patch:

	u8 bits_prop = bits_diff & bits;

Maybe you're talking about some patch down the line..
Jiong Wang April 11, 2019, 4:53 p.m. UTC | #4
> On 11 Apr 2019, at 17:44, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> 
> On Thu, 11 Apr 2019 07:13:03 +0100, Jiong Wang wrote:
>>>> @@ -1150,17 +1150,17 @@ static int mark_reg_read(struct bpf_verifier_env *env,
>>>> 				parent->var_off.value, parent->off);
>>>> 			return -EFAULT;
>>>> 		}
>>>> -		if (parent->live & REG_LIVE_READ)
>>>> +		if ((parent->live & REG_LIVE_READ) == flags)
>>>> 			/* The parentage chain never changes and
>>>> -			 * this parent was already marked as LIVE_READ.
>>>> +			 * this parent was already marked with all read bits.  
>>> 
>>> Do we have to propagate all read bits?  Read64 is strictly stronger
>>> than read32, as long as read64 is set on the parent we should be good?  
>> 
>> We should be good, but I doubt there is value to differentiate on this in this
>> kind of HOT function.
> 
> The entire if clause is an optimization.  I'm saying you can maintain it
> as more aggressive.

What I mean is I suspect the read width could be quite consistent in real program,
the percentage for doing extra check on read64 could actually be mishit for
most of the time, if the propagation iterations is big the extra check done each
time may overcome the propagation pruned.

I will do some benchmarking on this to see the real gain.

Regards,
Jiong


> 
>>>> @@ -6196,12 +6286,19 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env,
>>>> 				  struct bpf_reg_state *reg,
>>>> 				  struct bpf_reg_state *parent_reg)
>>>> {
>>>> +	u8 parent_bits = parent_reg->live & REG_LIVE_READ;
>>>> +	u8 bits = reg->live & REG_LIVE_READ;
>>>> +	u8 bits_diff = parent_bits ^ bits;
>>>> +	u8 bits_prop = bits_diff & bits;
>>>> 	int err;
>>>> 
>>>> -	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
>>>> +	/* "reg" and "parent_reg" has the same read bits, or the bit doesn't
>>>> +	 * belong to "reg".
>>>> +	 */
>>>> +	if (!bits_diff || !bits_prop)  
>>> 
>>> bits_prop is a subset of bits_diff, no?  !bits_prop is always true
>>> if !bits_diff is true, no need to check both.  
>> 
>> Bits_prop is a subset of bits_diff WHEN it comes from “reg", we don’t want to
>> do the propagation when the diff comes from “parent_reg”, so, we need to check
>> both.
> 
> Not sure what you're saying, in this patch:
> 
> 	u8 bits_prop = bits_diff & bits;
> 
> Maybe you're talking about some patch down the line..
Jiong Wang April 11, 2019, 5:22 p.m. UTC | #5
> On 11 Apr 2019, at 17:44, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> 
> On Thu, 11 Apr 2019 07:13:03 +0100, Jiong Wang wrote:
>>>> @@ -1150,17 +1150,17 @@ static int mark_reg_read(struct bpf_verifier_env *env,
>>>> 				parent->var_off.value, parent->off);
>>>> 			return -EFAULT;
>>>> 		}
>>>> -		if (parent->live & REG_LIVE_READ)
>>>> +		if ((parent->live & REG_LIVE_READ) == flags)
>>>> 			/* The parentage chain never changes and
>>>> -			 * this parent was already marked as LIVE_READ.
>>>> +			 * this parent was already marked with all read bits.  
>>> 
>>> Do we have to propagate all read bits?  Read64 is strictly stronger
>>> than read32, as long as read64 is set on the parent we should be good?  
>> 
>> We should be good, but I doubt there is value to differentiate on this in this
>> kind of HOT function.
> 
> The entire if clause is an optimization.  I'm saying you can maintain it
> as more aggressive.
> 
>>>> @@ -6196,12 +6286,19 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env,
>>>> 				  struct bpf_reg_state *reg,
>>>> 				  struct bpf_reg_state *parent_reg)
>>>> {
>>>> +	u8 parent_bits = parent_reg->live & REG_LIVE_READ;
>>>> +	u8 bits = reg->live & REG_LIVE_READ;
>>>> +	u8 bits_diff = parent_bits ^ bits;
>>>> +	u8 bits_prop = bits_diff & bits;
>>>> 	int err;
>>>> 
>>>> -	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
>>>> +	/* "reg" and "parent_reg" has the same read bits, or the bit doesn't
>>>> +	 * belong to "reg".
>>>> +	 */
>>>> +	if (!bits_diff || !bits_prop)  
>>> 
>>> bits_prop is a subset of bits_diff, no?  !bits_prop is always true
>>> if !bits_diff is true, no need to check both.  
>> 
>> Bits_prop is a subset of bits_diff WHEN it comes from “reg", we don’t want to
>> do the propagation when the diff comes from “parent_reg”, so, we need to check
>> both.
> 
> Not sure what you're saying, in this patch:
> 
> 	u8 bits_prop = bits_diff & bits;
> 
> Maybe you're talking about some patch down the line..

Ack, indeed, !bits_prop is always true if !bits_diff is true, will remove the
redundant check.

Thanks,
Regards,
Jiong
Jiong Wang April 12, 2019, 4:14 p.m. UTC | #6
On Thu, Apr 11, 2019 at 5:53 PM Jiong Wang <jiong.wang@netronome.com> wrote:
>
>
> > On 11 Apr 2019, at 17:44, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> >
> > On Thu, 11 Apr 2019 07:13:03 +0100, Jiong Wang wrote:
> >>>> @@ -1150,17 +1150,17 @@ static int mark_reg_read(struct bpf_verifier_env *env,
> >>>>                            parent->var_off.value, parent->off);
> >>>>                    return -EFAULT;
> >>>>            }
> >>>> -          if (parent->live & REG_LIVE_READ)
> >>>> +          if ((parent->live & REG_LIVE_READ) == flags)
> >>>>                    /* The parentage chain never changes and
> >>>> -                   * this parent was already marked as LIVE_READ.
> >>>> +                   * this parent was already marked with all read bits.
> >>>
> >>> Do we have to propagate all read bits?  Read64 is strictly stronger
> >>> than read32, as long as read64 is set on the parent we should be good?
> >>
> >> We should be good, but I doubt there is value to differentiate on this in this
> >> kind of HOT function.
> >
> > The entire if clause is an optimization.  I'm saying you can maintain it
> > as more aggressive.
>
> What I mean is I suspect the read width could be quite consistent in real program,
> the percentage for doing extra check on read64 could actually be mishit for
> most of the time, if the propagation iterations is big the extra check done each
> time may overcome the propagation pruned.
>
> I will do some benchmarking on this to see the real gain.

Take bpf_lxc.o for example, it has ~3 million mark_reg_read
propagation iteration.

Adding extra (parent->live & REG_LIVE_READ64) check cuts off 18% ~ 25%
propagation iterations in exchange of 75% iterations are doing one more check
(parent->live & REG_LIVE_READ64).

Given the propagation contains several statement, I think we are better off
adding such check (also done some ktime_get_real_ts64 run timing measure, but
the results is not very consistent).

Will add it in v3.

Regards,
Jiong
diff mbox series

Patch

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b3ab61f..fba0ebb 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -36,9 +36,11 @@ 
  */
 enum bpf_reg_liveness {
 	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
-	REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
-	REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
-	REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */
+	REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
+	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
+	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
+	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
+	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
 };
 
 struct bpf_reg_state {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bd30a65..44e4278 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1135,7 +1135,7 @@  static int check_subprogs(struct bpf_verifier_env *env)
  */
 static int mark_reg_read(struct bpf_verifier_env *env,
 			 const struct bpf_reg_state *state,
-			 struct bpf_reg_state *parent)
+			 struct bpf_reg_state *parent, u8 flags)
 {
 	bool writes = parent == state->parent; /* Observe write marks */
 	int cnt = 0;
@@ -1150,17 +1150,17 @@  static int mark_reg_read(struct bpf_verifier_env *env,
 				parent->var_off.value, parent->off);
 			return -EFAULT;
 		}
-		if (parent->live & REG_LIVE_READ)
+		if ((parent->live & REG_LIVE_READ) == flags)
 			/* The parentage chain never changes and
-			 * this parent was already marked as LIVE_READ.
+			 * this parent was already marked with all read bits.
 			 * There is no need to keep walking the chain again and
-			 * keep re-marking all parents as LIVE_READ.
+			 * keep re-marking all parents with reads bits in flags.
 			 * This case happens when the same register is read
 			 * multiple times without writes into it in-between.
 			 */
 			break;
 		/* ... then we depend on parent's value */
-		parent->live |= REG_LIVE_READ;
+		parent->live |= flags;
 		state = parent;
 		parent = state->parent;
 		writes = true;
@@ -1172,12 +1172,95 @@  static int mark_reg_read(struct bpf_verifier_env *env,
 	return 0;
 }
 
+/* This function is supposed to be used by the following 32-bit optimization
+ * code only. It returns TRUE if the source or destination register operates
+ * on 64-bit, otherwise return FALSE.
+ */
+static bool is_reg64(struct bpf_insn *insn, u32 regno,
+		     struct bpf_reg_state *reg, enum reg_arg_type t)
+{
+	u8 code, class, op;
+
+	code = insn->code;
+	class = BPF_CLASS(code);
+	op = BPF_OP(code);
+	if (class == BPF_JMP) {
+		/* BPF_EXIT will reach here because of return value readability
+		 * test for "main" which has s32 return value.
+		 */
+		if (op == BPF_EXIT)
+			return false;
+		if (op == BPF_CALL) {
+			/* BPF to BPF call will reach here because of marking
+			 * caller saved clobber with DST_OP_NO_MARK for which we
+			 * don't care the register def because they are anyway
+			 * marked as NOT_INIT already.
+			 */
+			if (insn->src_reg == BPF_PSEUDO_CALL)
+				return false;
+			/* Helper call will reach here because of arg type
+			 * check. Conservatively marking all args as 64-bit.
+			 */
+			return true;
+		}
+	}
+
+	if (class == BPF_ALU64 || class == BPF_JMP ||
+	    /* BPF_END always use BPF_ALU class. */
+	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
+		return true;
+
+	if (class == BPF_ALU || class == BPF_JMP32)
+		return false;
+
+	if (class == BPF_LDX) {
+		if (t != SRC_OP)
+			return BPF_SIZE(code) == BPF_DW;
+		/* LDX source must be ptr. */
+		return true;
+	}
+
+	if (class == BPF_STX) {
+		if (reg->type != SCALAR_VALUE)
+			return true;
+		return BPF_SIZE(code) == BPF_DW;
+	}
+
+	if (class == BPF_LD) {
+		u8 mode = BPF_MODE(code);
+
+		/* LD_IMM64 */
+		if (mode == BPF_IMM)
+			return true;
+
+		/* Both LD_IND and LD_ABS return 32-bit data. */
+		if (t != SRC_OP)
+			return  false;
+
+		/* Implicit ctx ptr. */
+		if (regno == BPF_REG_6)
+			return true;
+
+		/* Explicit source could be any width. */
+		return true;
+	}
+
+	if (class == BPF_ST)
+		/* The only source register for BPF_ST is a ptr. */
+		return true;
+
+	/* Conservatively return true at default. */
+	return true;
+}
+
 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 			 enum reg_arg_type t)
 {
 	struct bpf_verifier_state *vstate = env->cur_state;
 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
 	struct bpf_reg_state *reg, *regs = state->regs;
+	bool rw64;
 
 	if (regno >= MAX_BPF_REG) {
 		verbose(env, "R%d is invalid\n", regno);
@@ -1185,6 +1268,7 @@  static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 	}
 
 	reg = &regs[regno];
+	rw64 = is_reg64(insn, regno, reg, t);
 	if (t == SRC_OP) {
 		/* check whether register used as source operand can be read */
 		if (reg->type == NOT_INIT) {
@@ -1195,7 +1279,8 @@  static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 		if (regno == BPF_REG_FP)
 			return 0;
 
-		return mark_reg_read(env, reg, reg->parent);
+		return mark_reg_read(env, reg, reg->parent,
+				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
 	} else {
 		/* check whether register used as dest operand can be written to */
 		if (regno == BPF_REG_FP) {
@@ -1382,7 +1467,8 @@  static int check_stack_read(struct bpf_verifier_env *env,
 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
 		}
 		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
-			      reg_state->stack[spi].spilled_ptr.parent);
+			      reg_state->stack[spi].spilled_ptr.parent,
+			      REG_LIVE_READ64);
 		return 0;
 	} else {
 		int zeros = 0;
@@ -1399,7 +1485,9 @@  static int check_stack_read(struct bpf_verifier_env *env,
 			return -EACCES;
 		}
 		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
-			      reg_state->stack[spi].spilled_ptr.parent);
+			      reg_state->stack[spi].spilled_ptr.parent,
+			      size == BPF_REG_SIZE
+			      ? REG_LIVE_READ64 : REG_LIVE_READ32);
 		if (value_regno >= 0) {
 			if (zeros == size) {
 				/* any size read into register is zero extended,
@@ -2336,7 +2424,9 @@  static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		 * the whole slot to be marked as 'read'
 		 */
 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
-			      state->stack[spi].spilled_ptr.parent);
+			      state->stack[spi].spilled_ptr.parent,
+			      access_size == BPF_REG_SIZE
+			      ? REG_LIVE_READ64 : REG_LIVE_READ32);
 	}
 	return update_stack_depth(env, state, min_off);
 }
@@ -6196,12 +6286,19 @@  static int propagate_liveness_reg(struct bpf_verifier_env *env,
 				  struct bpf_reg_state *reg,
 				  struct bpf_reg_state *parent_reg)
 {
+	u8 parent_bits = parent_reg->live & REG_LIVE_READ;
+	u8 bits = reg->live & REG_LIVE_READ;
+	u8 bits_diff = parent_bits ^ bits;
+	u8 bits_prop = bits_diff & bits;
 	int err;
 
-	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
+	/* "reg" and "parent_reg" has the same read bits, or the bit doesn't
+	 * belong to "reg".
+	 */
+	if (!bits_diff || !bits_prop)
 		return 0;
 
-	err = mark_reg_read(env, reg, parent_reg);
+	err = mark_reg_read(env, reg, parent_reg, bits_prop);
 	if (err)
 		return err;