diff mbox series

[bpf-next,1/5] bpf, x64: use %rcx instead of %rax for tail call retpolines

Message ID 20200715233634.3868-2-maciej.fijalkowski@intel.com
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series bpf: tailcalls in BPF subprograms | expand

Commit Message

Maciej Fijalkowski July 15, 2020, 11:36 p.m. UTC
Currently, %rax is used to store the jump target when BPF program is
emitting the retpoline instructions that are handling the indirect
tailcall.

There is a plan to use %rax for different purpose, which is storing the
tail call counter. In order to preserve this value across the tailcalls,
use %rcx instead for jump target storage in retpoline instructions.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
---
 arch/x86/include/asm/nospec-branch.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

Comments

Daniel Borkmann July 16, 2020, 8:36 p.m. UTC | #1
On 7/16/20 1:36 AM, Maciej Fijalkowski wrote:
> Currently, %rax is used to store the jump target when BPF program is
> emitting the retpoline instructions that are handling the indirect
> tailcall.
> 
> There is a plan to use %rax for different purpose, which is storing the
> tail call counter. In order to preserve this value across the tailcalls,
> use %rcx instead for jump target storage in retpoline instructions.
> 
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> ---
>   arch/x86/include/asm/nospec-branch.h | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index e7752b4038ff..e491c3d9f227 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -314,19 +314,19 @@ static inline void mds_idle_clear_cpu_buffers(void)
>    *    lfence
>    *    jmp spec_trap
>    *  do_rop:
> - *    mov %rax,(%rsp) for x86_64
> + *    mov %rcx,(%rsp) for x86_64
>    *    mov %edx,(%esp) for x86_32
>    *    retq
>    *
>    * Without retpolines configured:
>    *
> - *    jmp *%rax for x86_64
> + *    jmp *%rcx for x86_64
>    *    jmp *%edx for x86_32
>    */
>   #ifdef CONFIG_RETPOLINE
>   # ifdef CONFIG_X86_64
> -#  define RETPOLINE_RAX_BPF_JIT_SIZE	17
> -#  define RETPOLINE_RAX_BPF_JIT()				\
> +#  define RETPOLINE_RCX_BPF_JIT_SIZE	17
> +#  define RETPOLINE_RCX_BPF_JIT()				\
>   do {								\
>   	EMIT1_off32(0xE8, 7);	 /* callq do_rop */		\
>   	/* spec_trap: */					\
> @@ -334,7 +334,7 @@ do {								\
>   	EMIT3(0x0F, 0xAE, 0xE8); /* lfence */			\
>   	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\
>   	/* do_rop: */						\
> -	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\
> +	EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */	\
>   	EMIT1(0xC3);             /* retq */			\
>   } while (0)
>   # else /* !CONFIG_X86_64 */
> @@ -352,9 +352,9 @@ do {								\
>   # endif
>   #else /* !CONFIG_RETPOLINE */
>   # ifdef CONFIG_X86_64
> -#  define RETPOLINE_RAX_BPF_JIT_SIZE	2
> -#  define RETPOLINE_RAX_BPF_JIT()				\
> -	EMIT2(0xFF, 0xE0);       /* jmp *%rax */
> +#  define RETPOLINE_RCX_BPF_JIT_SIZE	2
> +#  define RETPOLINE_RCX_BPF_JIT()				\
> +	EMIT2(0xFF, 0xE1);       /* jmp *%rcx */

Hmm, so the target prog gets loaded into rax in emit_bpf_tail_call_indirect()
but then you jump into rcx? What am I missing? This still needs to be bisectable.

>   # else /* !CONFIG_X86_64 */
>   #  define RETPOLINE_EDX_BPF_JIT()				\
>   	EMIT2(0xFF, 0xE2)        /* jmp *%edx */
>
Maciej Fijalkowski July 17, 2020, 9:29 a.m. UTC | #2
On Thu, Jul 16, 2020 at 10:37 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 7/16/20 1:36 AM, Maciej Fijalkowski wrote:
> > Currently, %rax is used to store the jump target when BPF program is
> > emitting the retpoline instructions that are handling the indirect
> > tailcall.
> >
> > There is a plan to use %rax for different purpose, which is storing the
> > tail call counter. In order to preserve this value across the tailcalls,
> > use %rcx instead for jump target storage in retpoline instructions.
> >
> > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> > ---
> >   arch/x86/include/asm/nospec-branch.h | 16 ++++++++--------
> >   1 file changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> > index e7752b4038ff..e491c3d9f227 100644
> > --- a/arch/x86/include/asm/nospec-branch.h
> > +++ b/arch/x86/include/asm/nospec-branch.h
> > @@ -314,19 +314,19 @@ static inline void mds_idle_clear_cpu_buffers(void)
> >    *    lfence
> >    *    jmp spec_trap
> >    *  do_rop:
> > - *    mov %rax,(%rsp) for x86_64
> > + *    mov %rcx,(%rsp) for x86_64
> >    *    mov %edx,(%esp) for x86_32
> >    *    retq
> >    *
> >    * Without retpolines configured:
> >    *
> > - *    jmp *%rax for x86_64
> > + *    jmp *%rcx for x86_64
> >    *    jmp *%edx for x86_32
> >    */
> >   #ifdef CONFIG_RETPOLINE
> >   # ifdef CONFIG_X86_64
> > -#  define RETPOLINE_RAX_BPF_JIT_SIZE 17
> > -#  define RETPOLINE_RAX_BPF_JIT()                            \
> > +#  define RETPOLINE_RCX_BPF_JIT_SIZE 17
> > +#  define RETPOLINE_RCX_BPF_JIT()                            \
> >   do {                                                                \
> >       EMIT1_off32(0xE8, 7);    /* callq do_rop */             \
> >       /* spec_trap: */                                        \
> > @@ -334,7 +334,7 @@ do {                                                              \
> >       EMIT3(0x0F, 0xAE, 0xE8); /* lfence */                   \
> >       EMIT2(0xEB, 0xF9);       /* jmp spec_trap */            \
> >       /* do_rop: */                                           \
> > -     EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */    \
> > +     EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */    \
> >       EMIT1(0xC3);             /* retq */                     \
> >   } while (0)
> >   # else /* !CONFIG_X86_64 */
> > @@ -352,9 +352,9 @@ do {                                                              \
> >   # endif
> >   #else /* !CONFIG_RETPOLINE */
> >   # ifdef CONFIG_X86_64
> > -#  define RETPOLINE_RAX_BPF_JIT_SIZE 2
> > -#  define RETPOLINE_RAX_BPF_JIT()                            \
> > -     EMIT2(0xFF, 0xE0);       /* jmp *%rax */
> > +#  define RETPOLINE_RCX_BPF_JIT_SIZE 2
> > +#  define RETPOLINE_RCX_BPF_JIT()                            \
> > +     EMIT2(0xFF, 0xE1);       /* jmp *%rcx */
>
> Hmm, so the target prog gets loaded into rax in emit_bpf_tail_call_indirect()
> but then you jump into rcx? What am I missing? This still needs to be bisectable.

Somehow your comments on patches 1, 2 and 3 didn't arrive to my work mail.
I'm responding from web-gmail as my client seems to be broken and I am
in a bit of hurry, so apologize for any inconveniences...

You are right of course, I will include the JIT change in this patch on v2.

>
> >   # else /* !CONFIG_X86_64 */
> >   #  define RETPOLINE_EDX_BPF_JIT()                           \
> >       EMIT2(0xFF, 0xE2)        /* jmp *%edx */
> >
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e7752b4038ff..e491c3d9f227 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -314,19 +314,19 @@  static inline void mds_idle_clear_cpu_buffers(void)
  *    lfence
  *    jmp spec_trap
  *  do_rop:
- *    mov %rax,(%rsp) for x86_64
+ *    mov %rcx,(%rsp) for x86_64
  *    mov %edx,(%esp) for x86_32
  *    retq
  *
  * Without retpolines configured:
  *
- *    jmp *%rax for x86_64
+ *    jmp *%rcx for x86_64
  *    jmp *%edx for x86_32
  */
 #ifdef CONFIG_RETPOLINE
 # ifdef CONFIG_X86_64
-#  define RETPOLINE_RAX_BPF_JIT_SIZE	17
-#  define RETPOLINE_RAX_BPF_JIT()				\
+#  define RETPOLINE_RCX_BPF_JIT_SIZE	17
+#  define RETPOLINE_RCX_BPF_JIT()				\
 do {								\
 	EMIT1_off32(0xE8, 7);	 /* callq do_rop */		\
 	/* spec_trap: */					\
@@ -334,7 +334,7 @@  do {								\
 	EMIT3(0x0F, 0xAE, 0xE8); /* lfence */			\
 	EMIT2(0xEB, 0xF9);       /* jmp spec_trap */		\
 	/* do_rop: */						\
-	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\
+	EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */	\
 	EMIT1(0xC3);             /* retq */			\
 } while (0)
 # else /* !CONFIG_X86_64 */
@@ -352,9 +352,9 @@  do {								\
 # endif
 #else /* !CONFIG_RETPOLINE */
 # ifdef CONFIG_X86_64
-#  define RETPOLINE_RAX_BPF_JIT_SIZE	2
-#  define RETPOLINE_RAX_BPF_JIT()				\
-	EMIT2(0xFF, 0xE0);       /* jmp *%rax */
+#  define RETPOLINE_RCX_BPF_JIT_SIZE	2
+#  define RETPOLINE_RCX_BPF_JIT()				\
+	EMIT2(0xFF, 0xE1);       /* jmp *%rcx */
 # else /* !CONFIG_X86_64 */
 #  define RETPOLINE_EDX_BPF_JIT()				\
 	EMIT2(0xFF, 0xE2)        /* jmp *%edx */