diff mbox series

[RFC,1/3] powerpc/bpf: implement bpf_arch_text_copy

Message ID 20221110184303.393179-2-hbathini@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series enable bpf_prog_pack allocator for powerpc | expand

Commit Message

Hari Bathini Nov. 10, 2022, 6:43 p.m. UTC
bpf_arch_text_copy is used to dump JITed binary to RX page, allowing
multiple BPF programs to share the same page. Using patch_instruction
to implement it.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/net/bpf_jit_comp.c | 39 ++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

Comments

Christophe Leroy Nov. 13, 2022, 1:17 p.m. UTC | #1
Le 10/11/2022 à 19:43, Hari Bathini a écrit :
> bpf_arch_text_copy is used to dump JITed binary to RX page, allowing
> multiple BPF programs to share the same page. Using patch_instruction
> to implement it.

Using patch_instruction() is nice for a quick implementation, but it is 
probably suboptimal. Due to the amount of data to be copied, it is worth 
a dedicated function that maps a RW copy of the page to be updated then 
does the copy at once with memcpy() then unmaps the page.

> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>   arch/powerpc/net/bpf_jit_comp.c | 39 ++++++++++++++++++++++++++++++++-
>   1 file changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 43e634126514..7383e0effad2 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -13,9 +13,12 @@
>   #include <linux/netdevice.h>
>   #include <linux/filter.h>
>   #include <linux/if_vlan.h>
> -#include <asm/kprobes.h>
> +#include <linux/memory.h>
>   #include <linux/bpf.h>
>   
> +#include <asm/kprobes.h>
> +#include <asm/code-patching.h>
> +
>   #include "bpf_jit.h"
>   
>   static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
> @@ -23,6 +26,35 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
>   	memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
>   }
>   
> +/*
> + * Patch 'len' bytes of instructions from opcode to addr, one instruction
> + * at a time. Returns addr on success. ERR_PTR(-EINVAL), otherwise.
> + */
> +static void *bpf_patch_instructions(void *addr, void *opcode, size_t len)
> +{
> +	void *ret = ERR_PTR(-EINVAL);
> +	size_t patched = 0;
> +	u32 *inst = opcode;
> +	u32 *start = addr;
> +
> +	if (WARN_ON_ONCE(core_kernel_text((unsigned long)addr)))
> +		return ret;
> +
> +	mutex_lock(&text_mutex);
> +	while (patched < len) {
> +		if (patch_instruction(start++, ppc_inst(*inst)))
> +			goto error;
> +
> +		inst++;
> +		patched += 4;
> +	}
> +
> +	ret = addr;
> +error:
> +	mutex_unlock(&text_mutex);
> +	return ret;
> +}
> +
>   /* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
>   static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
>   				   struct codegen_context *ctx, u32 *addrs)
> @@ -357,3 +389,8 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>   	ctx->exentry_idx++;
>   	return 0;
>   }
> +
> +void *bpf_arch_text_copy(void *dst, void *src, size_t len)
> +{
> +	return bpf_patch_instructions(dst, src, len);
> +}

I can't see the added value of having two functions when the first one 
just calls the second one and is the only user of it. Why not have 
implemented bpf_patch_instructions() directly inside bpf_arch_text_copy() ?

By the way, it can be nice to have two functions, but split them 
differently, to avoid the goto: etc ....

I also prefer using for loops instead of while loops.

It could have looked like below (untested):

static void *bpf_patch_instructions(void *addr, void *opcode, size_t len)
{
	u32 *inst = opcode;
	u32 *start = addr;
	u32 *end = addr + len;

	for (inst = opcode, start = addr; start < end; inst++, start++) {
		if (patch_instruction(start, ppc_inst(*inst)))
			return ERR_PTR(-EINVAL);
	}

	return addr;
}

void *bpf_arch_text_copy(void *dst, void *src, size_t len)
{
	if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
		return ret;

	mutex_lock(&text_mutex);

	ret = bpf_patch_instructions(dst, src, len);

	mutex_unlock(&text_mutex);

	return ret;
}
Hari Bathini Nov. 14, 2022, 2:54 p.m. UTC | #2
On 13/11/22 6:47 pm, Christophe Leroy wrote:
> Le 10/11/2022 à 19:43, Hari Bathini a écrit :
>> bpf_arch_text_copy is used to dump JITed binary to RX page, allowing
>> multiple BPF programs to share the same page. Using patch_instruction
>> to implement it.
> 
> Using patch_instruction() is nice for a quick implementation, but it is
> probably suboptimal. Due to the amount of data to be copied, it is worth

Yeah.

> a dedicated function that maps a RW copy of the page to be updated then
> does the copy at once with memcpy() then unmaps the page.

I will see if I can come up with such implementation for the respin.

> 
>>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>    arch/powerpc/net/bpf_jit_comp.c | 39 ++++++++++++++++++++++++++++++++-
>>    1 file changed, 38 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
>> index 43e634126514..7383e0effad2 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -13,9 +13,12 @@
>>    #include <linux/netdevice.h>
>>    #include <linux/filter.h>
>>    #include <linux/if_vlan.h>
>> -#include <asm/kprobes.h>
>> +#include <linux/memory.h>
>>    #include <linux/bpf.h>
>>    
>> +#include <asm/kprobes.h>
>> +#include <asm/code-patching.h>
>> +
>>    #include "bpf_jit.h"
>>    
>>    static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
>> @@ -23,6 +26,35 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
>>    	memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
>>    }
>>    
>> +/*
>> + * Patch 'len' bytes of instructions from opcode to addr, one instruction
>> + * at a time. Returns addr on success. ERR_PTR(-EINVAL), otherwise.
>> + */
>> +static void *bpf_patch_instructions(void *addr, void *opcode, size_t len)
>> +{
>> +	void *ret = ERR_PTR(-EINVAL);
>> +	size_t patched = 0;
>> +	u32 *inst = opcode;
>> +	u32 *start = addr;
>> +
>> +	if (WARN_ON_ONCE(core_kernel_text((unsigned long)addr)))
>> +		return ret;
>> +
>> +	mutex_lock(&text_mutex);
>> +	while (patched < len) {
>> +		if (patch_instruction(start++, ppc_inst(*inst)))
>> +			goto error;
>> +
>> +		inst++;
>> +		patched += 4;
>> +	}
>> +
>> +	ret = addr;
>> +error:
>> +	mutex_unlock(&text_mutex);
>> +	return ret;
>> +}
>> +
>>    /* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
>>    static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
>>    				   struct codegen_context *ctx, u32 *addrs)
>> @@ -357,3 +389,8 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>>    	ctx->exentry_idx++;
>>    	return 0;
>>    }
>> +
>> +void *bpf_arch_text_copy(void *dst, void *src, size_t len)
>> +{
>> +	return bpf_patch_instructions(dst, src, len);
>> +}
> 
> I can't see the added value of having two functions when the first one
> just calls the second one and is the only user of it. Why not have
> implemented bpf_patch_instructions() directly inside bpf_arch_text_copy() ?
> 
> By the way, it can be nice to have two functions, but split them
> differently, to avoid the goto: etc ....
> 
> I also prefer using for loops instead of while loops.
> 

> It could have looked like below (untested):
> 
> static void *bpf_patch_instructions(void *addr, void *opcode, size_t len)
> {
> 	u32 *inst = opcode;
> 	u32 *start = addr;
> 	u32 *end = addr + len;
> 
> 	for (inst = opcode, start = addr; start < end; inst++, start++) {
> 		if (patch_instruction(start, ppc_inst(*inst)))
> 			return ERR_PTR(-EINVAL);
> 	}
> 
> 	return addr;
> }
> 
> void *bpf_arch_text_copy(void *dst, void *src, size_t len)
> {
> 	if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
> 		return ret;
> 
> 	mutex_lock(&text_mutex);
> 
> 	ret = bpf_patch_instructions(dst, src, len);
> 
> 	mutex_unlock(&text_mutex);
> 
> 	return ret;
> }
> 
> 

Sure. Will use this.

Thanks
Hari
diff mbox series

Patch

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 43e634126514..7383e0effad2 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -13,9 +13,12 @@ 
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <asm/kprobes.h>
+#include <linux/memory.h>
 #include <linux/bpf.h>
 
+#include <asm/kprobes.h>
+#include <asm/code-patching.h>
+
 #include "bpf_jit.h"
 
 static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
@@ -23,6 +26,35 @@  static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
 	memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
 }
 
+/*
+ * Patch 'len' bytes of instructions from opcode to addr, one instruction
+ * at a time. Returns addr on success. ERR_PTR(-EINVAL), otherwise.
+ */
+static void *bpf_patch_instructions(void *addr, void *opcode, size_t len)
+{
+	void *ret = ERR_PTR(-EINVAL);
+	size_t patched = 0;
+	u32 *inst = opcode;
+	u32 *start = addr;
+
+	if (WARN_ON_ONCE(core_kernel_text((unsigned long)addr)))
+		return ret;
+
+	mutex_lock(&text_mutex);
+	while (patched < len) {
+		if (patch_instruction(start++, ppc_inst(*inst)))
+			goto error;
+
+		inst++;
+		patched += 4;
+	}
+
+	ret = addr;
+error:
+	mutex_unlock(&text_mutex);
+	return ret;
+}
+
 /* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
 static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
 				   struct codegen_context *ctx, u32 *addrs)
@@ -357,3 +389,8 @@  int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	ctx->exentry_idx++;
 	return 0;
 }
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+	return bpf_patch_instructions(dst, src, len);
+}