diff mbox series

[4/9] powerpc/bpf: Handle large branch ranges with BPF_EXIT

Message ID ebc0317ce465cb4f8d6fe485ab468ac5bda7c48f.1633104510.git.naveen.n.rao@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show
Series powerpc/bpf: Various fixes | expand

Commit Message

Naveen N. Rao Oct. 1, 2021, 9:14 p.m. UTC
In some scenarios, it is possible that the program epilogue is outside
the branch range for a BPF_EXIT instruction. Instead of rejecting such
programs, emit an indirect branch. We track the size of the bpf program
emitted after the initial run and do a second pass since BPF_EXIT can
end up emitting different number of instructions depending on the
program size.

Suggested-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
---
 arch/powerpc/net/bpf_jit.h        |  3 +++
 arch/powerpc/net/bpf_jit_comp.c   | 22 +++++++++++++++++++++-
 arch/powerpc/net/bpf_jit_comp32.c |  2 +-
 arch/powerpc/net/bpf_jit_comp64.c |  2 +-
 4 files changed, 26 insertions(+), 3 deletions(-)

Comments

Song Liu Oct. 1, 2021, 9:53 p.m. UTC | #1
On Fri, Oct 1, 2021 at 2:17 PM Naveen N. Rao
<naveen.n.rao@linux.vnet.ibm.com> wrote:
>
> In some scenarios, it is possible that the program epilogue is outside
> the branch range for a BPF_EXIT instruction. Instead of rejecting such
> programs, emit an indirect branch. We track the size of the bpf program
> emitted after the initial run and do a second pass since BPF_EXIT can
> end up emitting different number of instructions depending on the
> program size.
>
> Suggested-by: Jordan Niethe <jniethe5@gmail.com>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  arch/powerpc/net/bpf_jit.h        |  3 +++
>  arch/powerpc/net/bpf_jit_comp.c   | 22 +++++++++++++++++++++-
>  arch/powerpc/net/bpf_jit_comp32.c |  2 +-
>  arch/powerpc/net/bpf_jit_comp64.c |  2 +-
>  4 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 89bd744c2bffd4..4023de1698b9f5 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -126,6 +126,7 @@
>
>  #define SEEN_FUNC      0x20000000 /* might call external helpers */
>  #define SEEN_TAILCALL  0x40000000 /* uses tail calls */
> +#define SEEN_BIG_PROG  0x80000000 /* large prog, >32MB */
>
>  #define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
>  #define SEEN_NVREG_MASK        0x0003ffff /* Non volatile registers r14-r31 */
> @@ -179,6 +180,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_realloc_regs(struct codegen_context *ctx);
> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
> +                                       int tmp_reg, unsigned long exit_addr);
>
>  #endif
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index fcbf7a917c566e..3204872fbf2738 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -72,6 +72,21 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
>         return 0;
>  }
>
> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
> +                                       int tmp_reg, unsigned long exit_addr)
> +{
> +       if (!(ctx->seen & SEEN_BIG_PROG) && is_offset_in_branch_range(exit_addr)) {
> +               PPC_JMP(exit_addr);
> +       } else {
> +               ctx->seen |= SEEN_BIG_PROG;
> +               PPC_FUNC_ADDR(tmp_reg, (unsigned long)image + exit_addr);
> +               EMIT(PPC_RAW_MTCTR(tmp_reg));
> +               EMIT(PPC_RAW_BCTR());
> +       }
> +
> +       return 0;
> +}
> +
>  struct powerpc64_jit_data {
>         struct bpf_binary_header *header;
>         u32 *addrs;
> @@ -155,12 +170,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                 goto out_addrs;
>         }
>
> +       if (!is_offset_in_branch_range((long)cgctx.idx * 4))
> +               cgctx.seen |= SEEN_BIG_PROG;
> +
>         /*
>          * If we have seen a tail call, we need a second pass.
>          * This is because bpf_jit_emit_common_epilogue() is called
>          * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
> +        * We also need a second pass if we ended up with too large
> +        * a program so as to fix branches.
>          */
> -       if (cgctx.seen & SEEN_TAILCALL) {
> +       if (cgctx.seen & (SEEN_TAILCALL | SEEN_BIG_PROG)) {
>                 cgctx.idx = 0;
>                 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
>                         fp = org_fp;
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index a74d52204f8da2..d2a67574a23066 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -852,7 +852,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>                          * we'll just fall through to the epilogue.
>                          */
>                         if (i != flen - 1)
> -                               PPC_JMP(exit_addr);
> +                               bpf_jit_emit_exit_insn(image, ctx, tmp_reg, exit_addr);
>                         /* else fall through to the epilogue */
>                         break;
>
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index f06c62089b1457..3351a866ef6207 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -761,7 +761,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>                          * we'll just fall through to the epilogue.
>                          */
>                         if (i != flen - 1)
> -                               PPC_JMP(exit_addr);
> +                               bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr);
>                         /* else fall through to the epilogue */
>                         break;
>
> --
> 2.33.0
>
Johan Almbladh Oct. 2, 2021, 5:31 p.m. UTC | #2
On Fri, Oct 1, 2021 at 11:15 PM Naveen N. Rao
<naveen.n.rao@linux.vnet.ibm.com> wrote:
>
> In some scenarios, it is possible that the program epilogue is outside
> the branch range for a BPF_EXIT instruction. Instead of rejecting such
> programs, emit an indirect branch. We track the size of the bpf program
> emitted after the initial run and do a second pass since BPF_EXIT can
> end up emitting different number of instructions depending on the
> program size.
>
> Suggested-by: Jordan Niethe <jniethe5@gmail.com>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>

Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Tested-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>

> ---
>  arch/powerpc/net/bpf_jit.h        |  3 +++
>  arch/powerpc/net/bpf_jit_comp.c   | 22 +++++++++++++++++++++-
>  arch/powerpc/net/bpf_jit_comp32.c |  2 +-
>  arch/powerpc/net/bpf_jit_comp64.c |  2 +-
>  4 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 89bd744c2bffd4..4023de1698b9f5 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -126,6 +126,7 @@
>
>  #define SEEN_FUNC      0x20000000 /* might call external helpers */
>  #define SEEN_TAILCALL  0x40000000 /* uses tail calls */
> +#define SEEN_BIG_PROG  0x80000000 /* large prog, >32MB */
>
>  #define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
>  #define SEEN_NVREG_MASK        0x0003ffff /* Non volatile registers r14-r31 */
> @@ -179,6 +180,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_realloc_regs(struct codegen_context *ctx);
> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
> +                                       int tmp_reg, unsigned long exit_addr);
>
>  #endif
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index fcbf7a917c566e..3204872fbf2738 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -72,6 +72,21 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
>         return 0;
>  }
>
> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
> +                                       int tmp_reg, unsigned long exit_addr)
> +{
> +       if (!(ctx->seen & SEEN_BIG_PROG) && is_offset_in_branch_range(exit_addr)) {
> +               PPC_JMP(exit_addr);
> +       } else {
> +               ctx->seen |= SEEN_BIG_PROG;
> +               PPC_FUNC_ADDR(tmp_reg, (unsigned long)image + exit_addr);
> +               EMIT(PPC_RAW_MTCTR(tmp_reg));
> +               EMIT(PPC_RAW_BCTR());
> +       }
> +
> +       return 0;
> +}
> +
>  struct powerpc64_jit_data {
>         struct bpf_binary_header *header;
>         u32 *addrs;
> @@ -155,12 +170,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                 goto out_addrs;
>         }
>
> +       if (!is_offset_in_branch_range((long)cgctx.idx * 4))
> +               cgctx.seen |= SEEN_BIG_PROG;
> +
>         /*
>          * If we have seen a tail call, we need a second pass.
>          * This is because bpf_jit_emit_common_epilogue() is called
>          * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
> +        * We also need a second pass if we ended up with too large
> +        * a program so as to fix branches.
>          */
> -       if (cgctx.seen & SEEN_TAILCALL) {
> +       if (cgctx.seen & (SEEN_TAILCALL | SEEN_BIG_PROG)) {
>                 cgctx.idx = 0;
>                 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
>                         fp = org_fp;
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index a74d52204f8da2..d2a67574a23066 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -852,7 +852,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>                          * we'll just fall through to the epilogue.
>                          */
>                         if (i != flen - 1)
> -                               PPC_JMP(exit_addr);
> +                               bpf_jit_emit_exit_insn(image, ctx, tmp_reg, exit_addr);
>                         /* else fall through to the epilogue */
>                         break;
>
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index f06c62089b1457..3351a866ef6207 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -761,7 +761,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>                          * we'll just fall through to the epilogue.
>                          */
>                         if (i != flen - 1)
> -                               PPC_JMP(exit_addr);
> +                               bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr);
>                         /* else fall through to the epilogue */
>                         break;
>
> --
> 2.33.0
>
Christophe Leroy Oct. 3, 2021, 7:59 a.m. UTC | #3
Le 01/10/2021 à 23:14, Naveen N. Rao a écrit :
> In some scenarios, it is possible that the program epilogue is outside
> the branch range for a BPF_EXIT instruction. Instead of rejecting such
> programs, emit an indirect branch. We track the size of the bpf program
> emitted after the initial run and do a second pass since BPF_EXIT can
> end up emitting different number of instructions depending on the
> program size.
> 
> Suggested-by: Jordan Niethe <jniethe5@gmail.com>
> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
> ---
>   arch/powerpc/net/bpf_jit.h        |  3 +++
>   arch/powerpc/net/bpf_jit_comp.c   | 22 +++++++++++++++++++++-
>   arch/powerpc/net/bpf_jit_comp32.c |  2 +-
>   arch/powerpc/net/bpf_jit_comp64.c |  2 +-
>   4 files changed, 26 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 89bd744c2bffd4..4023de1698b9f5 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -126,6 +126,7 @@
>   
>   #define SEEN_FUNC	0x20000000 /* might call external helpers */
>   #define SEEN_TAILCALL	0x40000000 /* uses tail calls */
> +#define SEEN_BIG_PROG	0x80000000 /* large prog, >32MB */
>   
>   #define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
>   #define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
> @@ -179,6 +180,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
> +					int tmp_reg, unsigned long exit_addr);
>   
>   #endif
>   
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index fcbf7a917c566e..3204872fbf2738 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -72,6 +72,21 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
>   	return 0;
>   }
>   
> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
> +					int tmp_reg, unsigned long exit_addr)
> +{
> +	if (!(ctx->seen & SEEN_BIG_PROG) && is_offset_in_branch_range(exit_addr)) {
> +		PPC_JMP(exit_addr);
> +	} else {
> +		ctx->seen |= SEEN_BIG_PROG;
> +		PPC_FUNC_ADDR(tmp_reg, (unsigned long)image + exit_addr);
> +		EMIT(PPC_RAW_MTCTR(tmp_reg));
> +		EMIT(PPC_RAW_BCTR());
> +	}
> +
> +	return 0;
> +}
> +
>   struct powerpc64_jit_data {
>   	struct bpf_binary_header *header;
>   	u32 *addrs;
> @@ -155,12 +170,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		goto out_addrs;
>   	}
>   
> +	if (!is_offset_in_branch_range((long)cgctx.idx * 4))
> +		cgctx.seen |= SEEN_BIG_PROG;
> +
>   	/*
>   	 * If we have seen a tail call, we need a second pass.
>   	 * This is because bpf_jit_emit_common_epilogue() is called
>   	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
> +	 * We also need a second pass if we ended up with too large
> +	 * a program so as to fix branches.
>   	 */
> -	if (cgctx.seen & SEEN_TAILCALL) {
> +	if (cgctx.seen & (SEEN_TAILCALL | SEEN_BIG_PROG)) {
>   		cgctx.idx = 0;
>   		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
>   			fp = org_fp;
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index a74d52204f8da2..d2a67574a23066 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -852,7 +852,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			 * we'll just fall through to the epilogue.
>   			 */
>   			if (i != flen - 1)
> -				PPC_JMP(exit_addr);
> +				bpf_jit_emit_exit_insn(image, ctx, tmp_reg, exit_addr);

On ppc32, if you use tmp_reg you must flag it. But I think you could use 
r0 instead.

>   			/* else fall through to the epilogue */
>   			break;
>   
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index f06c62089b1457..3351a866ef6207 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -761,7 +761,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   			 * we'll just fall through to the epilogue.
>   			 */
>   			if (i != flen - 1)
> -				PPC_JMP(exit_addr);
> +				bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr);
>   			/* else fall through to the epilogue */
>   			break;
>   
>
Naveen N. Rao Oct. 4, 2021, 6:24 p.m. UTC | #4
Christophe Leroy wrote:
> 
> 
> Le 01/10/2021 à 23:14, Naveen N. Rao a écrit :
>> In some scenarios, it is possible that the program epilogue is outside
>> the branch range for a BPF_EXIT instruction. Instead of rejecting such
>> programs, emit an indirect branch. We track the size of the bpf program
>> emitted after the initial run and do a second pass since BPF_EXIT can
>> end up emitting different number of instructions depending on the
>> program size.
>> 
>> Suggested-by: Jordan Niethe <jniethe5@gmail.com>
>> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
>> ---
>>   arch/powerpc/net/bpf_jit.h        |  3 +++
>>   arch/powerpc/net/bpf_jit_comp.c   | 22 +++++++++++++++++++++-
>>   arch/powerpc/net/bpf_jit_comp32.c |  2 +-
>>   arch/powerpc/net/bpf_jit_comp64.c |  2 +-
>>   4 files changed, 26 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index 89bd744c2bffd4..4023de1698b9f5 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -126,6 +126,7 @@
>>   
>>   #define SEEN_FUNC	0x20000000 /* might call external helpers */
>>   #define SEEN_TAILCALL	0x40000000 /* uses tail calls */
>> +#define SEEN_BIG_PROG	0x80000000 /* large prog, >32MB */
>>   
>>   #define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
>>   #define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
>> @@ -179,6 +180,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
>> +					int tmp_reg, unsigned long exit_addr);
>>   
>>   #endif
>>   
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
>> index fcbf7a917c566e..3204872fbf2738 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -72,6 +72,21 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
>>   	return 0;
>>   }
>>   
>> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
>> +					int tmp_reg, unsigned long exit_addr)
>> +{
>> +	if (!(ctx->seen & SEEN_BIG_PROG) && is_offset_in_branch_range(exit_addr)) {
>> +		PPC_JMP(exit_addr);
>> +	} else {
>> +		ctx->seen |= SEEN_BIG_PROG;
>> +		PPC_FUNC_ADDR(tmp_reg, (unsigned long)image + exit_addr);
>> +		EMIT(PPC_RAW_MTCTR(tmp_reg));
>> +		EMIT(PPC_RAW_BCTR());
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   struct powerpc64_jit_data {
>>   	struct bpf_binary_header *header;
>>   	u32 *addrs;
>> @@ -155,12 +170,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>   		goto out_addrs;
>>   	}
>>   
>> +	if (!is_offset_in_branch_range((long)cgctx.idx * 4))
>> +		cgctx.seen |= SEEN_BIG_PROG;
>> +
>>   	/*
>>   	 * If we have seen a tail call, we need a second pass.
>>   	 * This is because bpf_jit_emit_common_epilogue() is called
>>   	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
>> +	 * We also need a second pass if we ended up with too large
>> +	 * a program so as to fix branches.
>>   	 */
>> -	if (cgctx.seen & SEEN_TAILCALL) {
>> +	if (cgctx.seen & (SEEN_TAILCALL | SEEN_BIG_PROG)) {
>>   		cgctx.idx = 0;
>>   		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
>>   			fp = org_fp;
>> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
>> index a74d52204f8da2..d2a67574a23066 100644
>> --- a/arch/powerpc/net/bpf_jit_comp32.c
>> +++ b/arch/powerpc/net/bpf_jit_comp32.c
>> @@ -852,7 +852,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>>   			 * we'll just fall through to the epilogue.
>>   			 */
>>   			if (i != flen - 1)
>> -				PPC_JMP(exit_addr);
>> +				bpf_jit_emit_exit_insn(image, ctx, tmp_reg, exit_addr);
> 
> On ppc32, if you use tmp_reg you must flag it. But I think you could use 
> r0 instead.

Indeed. Can we drop tracking of the temp registers and using them while
remapping registers? Are you seeing significant benefits with re-use of 
those temp registers?

- Naveen
Christophe Leroy Oct. 5, 2021, 5:46 a.m. UTC | #5
Le 04/10/2021 à 20:24, Naveen N. Rao a écrit :
> Christophe Leroy wrote:
>>
>>
>> Le 01/10/2021 à 23:14, Naveen N. Rao a écrit :
>>> In some scenarios, it is possible that the program epilogue is outside
>>> the branch range for a BPF_EXIT instruction. Instead of rejecting such
>>> programs, emit an indirect branch. We track the size of the bpf program
>>> emitted after the initial run and do a second pass since BPF_EXIT can
>>> end up emitting different number of instructions depending on the
>>> program size.
>>>
>>> Suggested-by: Jordan Niethe <jniethe5@gmail.com>
>>> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
>>> ---
>>>   arch/powerpc/net/bpf_jit.h        |  3 +++
>>>   arch/powerpc/net/bpf_jit_comp.c   | 22 +++++++++++++++++++++-
>>>   arch/powerpc/net/bpf_jit_comp32.c |  2 +-
>>>   arch/powerpc/net/bpf_jit_comp64.c |  2 +-
>>>   4 files changed, 26 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>>> index 89bd744c2bffd4..4023de1698b9f5 100644
>>> --- a/arch/powerpc/net/bpf_jit.h
>>> +++ b/arch/powerpc/net/bpf_jit.h
>>> @@ -126,6 +126,7 @@
>>>   #define SEEN_FUNC    0x20000000 /* might call external helpers */
>>>   #define SEEN_TAILCALL    0x40000000 /* uses tail calls */
>>> +#define SEEN_BIG_PROG    0x80000000 /* large prog, >32MB */
>>>   #define SEEN_VREG_MASK    0x1ff80000 /* Volatile registers r3-r12 */
>>>   #define SEEN_NVREG_MASK    0x0003ffff /* Non volatile registers 
>>> r14-r31 */
>>> @@ -179,6 +180,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>> *image, struct codegen_context *
>>>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>>>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>>>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>>> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
>>> +                    int tmp_reg, unsigned long exit_addr);
>>>   #endif
>>> diff --git a/arch/powerpc/net/bpf_jit_comp.c 
>>> b/arch/powerpc/net/bpf_jit_comp.c
>>> index fcbf7a917c566e..3204872fbf2738 100644
>>> --- a/arch/powerpc/net/bpf_jit_comp.c
>>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>>> @@ -72,6 +72,21 @@ static int bpf_jit_fixup_subprog_calls(struct 
>>> bpf_prog *fp, u32 *image,
>>>       return 0;
>>>   }
>>> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
>>> +                    int tmp_reg, unsigned long exit_addr)
>>> +{
>>> +    if (!(ctx->seen & SEEN_BIG_PROG) && 
>>> is_offset_in_branch_range(exit_addr)) {
>>> +        PPC_JMP(exit_addr);
>>> +    } else {
>>> +        ctx->seen |= SEEN_BIG_PROG;
>>> +        PPC_FUNC_ADDR(tmp_reg, (unsigned long)image + exit_addr);
>>> +        EMIT(PPC_RAW_MTCTR(tmp_reg));
>>> +        EMIT(PPC_RAW_BCTR());
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   struct powerpc64_jit_data {
>>>       struct bpf_binary_header *header;
>>>       u32 *addrs;
>>> @@ -155,12 +170,17 @@ struct bpf_prog *bpf_int_jit_compile(struct 
>>> bpf_prog *fp)
>>>           goto out_addrs;
>>>       }
>>> +    if (!is_offset_in_branch_range((long)cgctx.idx * 4))
>>> +        cgctx.seen |= SEEN_BIG_PROG;
>>> +
>>>       /*
>>>        * If we have seen a tail call, we need a second pass.
>>>        * This is because bpf_jit_emit_common_epilogue() is called
>>>        * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
>>> +     * We also need a second pass if we ended up with too large
>>> +     * a program so as to fix branches.
>>>        */
>>> -    if (cgctx.seen & SEEN_TAILCALL) {
>>> +    if (cgctx.seen & (SEEN_TAILCALL | SEEN_BIG_PROG)) {
>>>           cgctx.idx = 0;
>>>           if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
>>>               fp = org_fp;
>>> diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
>>> b/arch/powerpc/net/bpf_jit_comp32.c
>>> index a74d52204f8da2..d2a67574a23066 100644
>>> --- a/arch/powerpc/net/bpf_jit_comp32.c
>>> +++ b/arch/powerpc/net/bpf_jit_comp32.c
>>> @@ -852,7 +852,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>> *image, struct codegen_context *
>>>                * we'll just fall through to the epilogue.
>>>                */
>>>               if (i != flen - 1)
>>> -                PPC_JMP(exit_addr);
>>> +                bpf_jit_emit_exit_insn(image, ctx, tmp_reg, exit_addr);
>>
>> On ppc32, if you use tmp_reg you must flag it. But I think you could 
>> use r0 instead.
> 
> Indeed. Can we drop tracking of the temp registers and using them while
> remapping registers? Are you seeing significant benefits with re-use of 
> those temp registers?
> 

I'm not sure to follow you.

On ppc32, all volatile registers are used for function arguments, so 
temp registers are necessarily non-volatile so we track them as all 
non-volatile registers we use.

I think saving on stack only the non-volatile registers we use provides 
real benefit, otherwise you wouldn't have implemented it would you ?

Anyway here you should use _R0 instead of tmp_reg.

Christophe
Naveen N. Rao Jan. 7, 2022, 11:46 a.m. UTC | #6
Christophe Leroy wrote:
> 
> 
> Le 04/10/2021 à 20:24, Naveen N. Rao a écrit :
>> Christophe Leroy wrote:
>>>
>>>
>>> Le 01/10/2021 à 23:14, Naveen N. Rao a écrit :
>>>> In some scenarios, it is possible that the program epilogue is outside
>>>> the branch range for a BPF_EXIT instruction. Instead of rejecting such
>>>> programs, emit an indirect branch. We track the size of the bpf program
>>>> emitted after the initial run and do a second pass since BPF_EXIT can
>>>> end up emitting different number of instructions depending on the
>>>> program size.
>>>>
>>>> Suggested-by: Jordan Niethe <jniethe5@gmail.com>
>>>> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
>>>> ---
>>>>   arch/powerpc/net/bpf_jit.h        |  3 +++
>>>>   arch/powerpc/net/bpf_jit_comp.c   | 22 +++++++++++++++++++++-
>>>>   arch/powerpc/net/bpf_jit_comp32.c |  2 +-
>>>>   arch/powerpc/net/bpf_jit_comp64.c |  2 +-
>>>>   4 files changed, 26 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>>>> index 89bd744c2bffd4..4023de1698b9f5 100644
>>>> --- a/arch/powerpc/net/bpf_jit.h
>>>> +++ b/arch/powerpc/net/bpf_jit.h
>>>> @@ -126,6 +126,7 @@
>>>>   #define SEEN_FUNC    0x20000000 /* might call external helpers */
>>>>   #define SEEN_TAILCALL    0x40000000 /* uses tail calls */
>>>> +#define SEEN_BIG_PROG    0x80000000 /* large prog, >32MB */
>>>>   #define SEEN_VREG_MASK    0x1ff80000 /* Volatile registers r3-r12 */
>>>>   #define SEEN_NVREG_MASK    0x0003ffff /* Non volatile registers 
>>>> r14-r31 */
>>>> @@ -179,6 +180,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>>> *image, struct codegen_context *
>>>>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>>>>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>>>>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>>>> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
>>>> +                    int tmp_reg, unsigned long exit_addr);
>>>>   #endif
>>>> diff --git a/arch/powerpc/net/bpf_jit_comp.c 
>>>> b/arch/powerpc/net/bpf_jit_comp.c
>>>> index fcbf7a917c566e..3204872fbf2738 100644
>>>> --- a/arch/powerpc/net/bpf_jit_comp.c
>>>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>>>> @@ -72,6 +72,21 @@ static int bpf_jit_fixup_subprog_calls(struct 
>>>> bpf_prog *fp, u32 *image,
>>>>       return 0;
>>>>   }
>>>> +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
>>>> +                    int tmp_reg, unsigned long exit_addr)
>>>> +{
>>>> +    if (!(ctx->seen & SEEN_BIG_PROG) && 
>>>> is_offset_in_branch_range(exit_addr)) {
>>>> +        PPC_JMP(exit_addr);
>>>> +    } else {
>>>> +        ctx->seen |= SEEN_BIG_PROG;
>>>> +        PPC_FUNC_ADDR(tmp_reg, (unsigned long)image + exit_addr);
>>>> +        EMIT(PPC_RAW_MTCTR(tmp_reg));
>>>> +        EMIT(PPC_RAW_BCTR());
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   struct powerpc64_jit_data {
>>>>       struct bpf_binary_header *header;
>>>>       u32 *addrs;
>>>> @@ -155,12 +170,17 @@ struct bpf_prog *bpf_int_jit_compile(struct 
>>>> bpf_prog *fp)
>>>>           goto out_addrs;
>>>>       }
>>>> +    if (!is_offset_in_branch_range((long)cgctx.idx * 4))
>>>> +        cgctx.seen |= SEEN_BIG_PROG;
>>>> +
>>>>       /*
>>>>        * If we have seen a tail call, we need a second pass.
>>>>        * This is because bpf_jit_emit_common_epilogue() is called
>>>>        * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
>>>> +     * We also need a second pass if we ended up with too large
>>>> +     * a program so as to fix branches.
>>>>        */
>>>> -    if (cgctx.seen & SEEN_TAILCALL) {
>>>> +    if (cgctx.seen & (SEEN_TAILCALL | SEEN_BIG_PROG)) {
>>>>           cgctx.idx = 0;
>>>>           if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
>>>>               fp = org_fp;
>>>> diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
>>>> b/arch/powerpc/net/bpf_jit_comp32.c
>>>> index a74d52204f8da2..d2a67574a23066 100644
>>>> --- a/arch/powerpc/net/bpf_jit_comp32.c
>>>> +++ b/arch/powerpc/net/bpf_jit_comp32.c
>>>> @@ -852,7 +852,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 
>>>> *image, struct codegen_context *
>>>>                * we'll just fall through to the epilogue.
>>>>                */
>>>>               if (i != flen - 1)
>>>> -                PPC_JMP(exit_addr);
>>>> +                bpf_jit_emit_exit_insn(image, ctx, tmp_reg, exit_addr);
>>>
>>> On ppc32, if you use tmp_reg you must flag it. But I think you could 
>>> use r0 instead.
>> 
>> Indeed. Can we drop tracking of the temp registers and using them while
>> remapping registers? Are you seeing significant benefits with re-use of 
>> those temp registers?
>> 
> 
> I'm not sure to follow you.
> 
> On ppc32, all volatile registers are used for function arguments, so 
> temp registers are necessarily non-volatile so we track them as all 
> non-volatile registers we use.
> 
> I think saving on stack only the non-volatile registers we use provides 
> real benefit, otherwise you wouldn't have implemented it would you ?

You're right. I was wary of having to track temporary register usage, 
which is a bit harder and prone to mistakes like the above. A related 
concern was that the register remapping is only used if there are no 
helper calls, which looks like a big limitation.

But, I do agree that it is worth the trouble for ppc32 given the 
register usage.

> 
> Anyway here you should use _R0 instead of tmp_reg.


Thanks,
Naveen
diff mbox series

Patch

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 89bd744c2bffd4..4023de1698b9f5 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -126,6 +126,7 @@ 
 
 #define SEEN_FUNC	0x20000000 /* might call external helpers */
 #define SEEN_TAILCALL	0x40000000 /* uses tail calls */
+#define SEEN_BIG_PROG	0x80000000 /* large prog, >32MB */
 
 #define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
 #define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
@@ -179,6 +180,8 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
+					int tmp_reg, unsigned long exit_addr);
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index fcbf7a917c566e..3204872fbf2738 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -72,6 +72,21 @@  static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
 	return 0;
 }
 
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx,
+					int tmp_reg, unsigned long exit_addr)
+{
+	if (!(ctx->seen & SEEN_BIG_PROG) && is_offset_in_branch_range(exit_addr)) {
+		PPC_JMP(exit_addr);
+	} else {
+		ctx->seen |= SEEN_BIG_PROG;
+		PPC_FUNC_ADDR(tmp_reg, (unsigned long)image + exit_addr);
+		EMIT(PPC_RAW_MTCTR(tmp_reg));
+		EMIT(PPC_RAW_BCTR());
+	}
+
+	return 0;
+}
+
 struct powerpc64_jit_data {
 	struct bpf_binary_header *header;
 	u32 *addrs;
@@ -155,12 +170,17 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		goto out_addrs;
 	}
 
+	if (!is_offset_in_branch_range((long)cgctx.idx * 4))
+		cgctx.seen |= SEEN_BIG_PROG;
+
 	/*
 	 * If we have seen a tail call, we need a second pass.
 	 * This is because bpf_jit_emit_common_epilogue() is called
 	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+	 * We also need a second pass if we ended up with too large
+	 * a program so as to fix branches.
 	 */
-	if (cgctx.seen & SEEN_TAILCALL) {
+	if (cgctx.seen & (SEEN_TAILCALL | SEEN_BIG_PROG)) {
 		cgctx.idx = 0;
 		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
 			fp = org_fp;
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index a74d52204f8da2..d2a67574a23066 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -852,7 +852,7 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			 * we'll just fall through to the epilogue.
 			 */
 			if (i != flen - 1)
-				PPC_JMP(exit_addr);
+				bpf_jit_emit_exit_insn(image, ctx, tmp_reg, exit_addr);
 			/* else fall through to the epilogue */
 			break;
 
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index f06c62089b1457..3351a866ef6207 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -761,7 +761,7 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			 * we'll just fall through to the epilogue.
 			 */
 			if (i != flen - 1)
-				PPC_JMP(exit_addr);
+				bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr);
 			/* else fall through to the epilogue */
 			break;