diff mbox series

[RFC,v2,09/13] x86/um/vdso: nommu: vdso memory update

Message ID a00a498ea9803ac3c6f2b933203a2675d1785208.1731290567.git.thehajime@gmail.com
State RFC
Headers show
Series nommu UML | expand

Commit Message

Hajime Tazaki Nov. 11, 2024, 6:27 a.m. UTC
On !MMU mode, the address of vdso is accessible from userspace.  This
commit implements the entry point by pointing a block of page address.

This commit also add memory permission configuration of vdso page to be
executable.

Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
---
 arch/x86/um/vdso/um_vdso.c | 20 ++++++++++++++++++++
 arch/x86/um/vdso/vma.c     | 14 ++++++++++++++
 2 files changed, 34 insertions(+)

Comments

Benjamin Berg Nov. 27, 2024, 10:36 a.m. UTC | #1
Hi,

On Mon, 2024-11-11 at 15:27 +0900, Hajime Tazaki wrote:
> On !MMU mode, the address of vdso is accessible from userspace.  This
> commit implements the entry point by pointing a block of page address.
> 
> This commit also add memory permission configuration of vdso page to be
> executable.
> 
> Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
> Signed-off-by: Ricardo Koller <ricarkol@google.com>
> ---
>  arch/x86/um/vdso/um_vdso.c | 20 ++++++++++++++++++++
>  arch/x86/um/vdso/vma.c     | 14 ++++++++++++++
>  2 files changed, 34 insertions(+)
> 
> diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
> index cbae2584124f..eff3e6641a0e 100644
> --- a/arch/x86/um/vdso/um_vdso.c
> +++ b/arch/x86/um/vdso/um_vdso.c
> @@ -23,10 +23,17 @@ int __vdso_clock_gettime(clockid_t clock, struct
> __kernel_old_timespec *ts)
>  {
>  	long ret;
>  
> +#ifdef CONFIG_MMU
>  	asm("syscall"
>  		: "=a" (ret)
>  		: "0" (__NR_clock_gettime), "D" (clock), "S" (ts)
>  		: "rcx", "r11", "memory");
> +#else
> +	asm("call *%1"
> +		: "=a" (ret)
> +		: "0" ((unsigned long)__NR_clock_gettime), "D"
> (clock), "S" (ts)
> +		: "rcx", "r11", "memory");
> +#endif
>  
>  	return ret;
>  }
> @@ -37,10 +44,17 @@ int __vdso_gettimeofday(struct
> __kernel_old_timeval *tv, struct timezone *tz)
>  {
>  	long ret;
>  
> +#ifdef CONFIG_MMU
>  	asm("syscall"
>  		: "=a" (ret)
>  		: "0" (__NR_gettimeofday), "D" (tv), "S" (tz)
>  		: "rcx", "r11", "memory");
> +#else
> +	asm("call *%1"
> +		: "=a" (ret)
> +		: "0" ((unsigned long)__NR_gettimeofday), "D" (tv),
> "S" (tz)
> +		: "rcx", "r11", "memory");
> +#endif
>  
>  	return ret;
>  }
> @@ -51,9 +65,15 @@ __kernel_old_time_t
> __vdso_time(__kernel_old_time_t *t)
>  {
>  	long secs;
>  
> +#ifdef CONFIG_MMU
>  	asm volatile("syscall"
>  		: "=a" (secs)
>  		: "0" (__NR_time), "D" (t) : "cc", "r11", "cx",
> "memory");
> +#else
> +	asm("call *%1"
> +		: "=a" (secs)
> +		: "0" ((unsigned long)__NR_time), "D" (t) : "cc",
> "r11", "cx", "memory");
> +#endif

Maybe introduce a macro for "syscall" vs. "call *%1"? The parameters
should be identical in both cases. The "call" could probably even jump
to the end of the NOP ramp directly in this case.

Though maybe I am missing something with the "(unsigned long)" cast?

>  	return secs;
>  }
> diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
> index f238f7b33cdd..83c861e2a815 100644
> --- a/arch/x86/um/vdso/vma.c
> +++ b/arch/x86/um/vdso/vma.c
> @@ -9,6 +9,7 @@
>  #include <asm/page.h>
>  #include <asm/elf.h>
>  #include <linux/init.h>
> +#include <os.h>
>  
>  static unsigned int __read_mostly vdso_enabled = 1;
>  unsigned long um_vdso_addr;
> @@ -24,7 +25,9 @@ static int __init init_vdso(void)
>  
>  	BUG_ON(vdso_end - vdso_start > PAGE_SIZE);
>  
> +#ifdef CONFIG_MMU
>  	um_vdso_addr = task_size - PAGE_SIZE;
> +#endif
>  
>  	vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
>  	if (!vdsop)
> @@ -40,6 +43,15 @@ static int __init init_vdso(void)
>  	copy_page(page_address(um_vdso), vdso_start);
>  	*vdsop = um_vdso;
>  
> +#ifndef CONFIG_MMU
> +	/* this is fine with NOMMU as everything is accessible */
> +	um_vdso_addr = (unsigned long)page_address(um_vdso);
> +	os_protect_memory((void *)um_vdso_addr, vdso_end -
> vdso_start, 1, 1, 1);

I think this should be "1, 0, 1", i.e. we shouldn't enable write
access.

> +	pr_debug("vdso_start=%lx um_vdso_addr=%lx pg_um_vdso=%lx",
> +	       (unsigned long)vdso_start, um_vdso_addr,
> +	       (unsigned long)page_address(um_vdso));
> +#endif
> +
>  	return 0;
>  
>  oom:
> @@ -50,6 +62,7 @@ static int __init init_vdso(void)
>  }
>  subsys_initcall(init_vdso);
>  
> +#ifdef CONFIG_MMU
>  int arch_setup_additional_pages(struct linux_binprm *bprm, int
> uses_interp)
>  {
>  	struct vm_area_struct *vma;
> @@ -74,3 +87,4 @@ int arch_setup_additional_pages(struct linux_binprm
> *bprm, int uses_interp)
>  
>  	return IS_ERR(vma) ? PTR_ERR(vma) : 0;
>  }
> +#endif
Hajime Tazaki Nov. 27, 2024, 11:23 p.m. UTC | #2
Thanks Benjamin,

On Wed, 27 Nov 2024 19:36:44 +0900,
Benjamin Berg wrote:

> > @@ -51,9 +65,15 @@ __kernel_old_time_t
> > __vdso_time(__kernel_old_time_t *t)
> >  {
> >  	long secs;
> >  
> > +#ifdef CONFIG_MMU
> >  	asm volatile("syscall"
> >  		: "=a" (secs)
> >  		: "0" (__NR_time), "D" (t) : "cc", "r11", "cx",
> > "memory");
> > +#else
> > +	asm("call *%1"
> > +		: "=a" (secs)
> > +		: "0" ((unsigned long)__NR_time), "D" (t) : "cc",
> > "r11", "cx", "memory");
> > +#endif
> 
> Maybe introduce a macro for "syscall" vs. "call *%1"? The parameters
> should be identical in both cases.

I think it's nice to clean up this part.

> The "call" could probably even jump
> to the end of the NOP ramp directly in this case.

ah, that would be a bit of speed ups.
confirmed that it works.  I'll update it if the code readability
doesn't become worse.

> Though maybe I am missing something with the "(unsigned long)" cast?

without this, gcc says

  CC      arch/x86/um/vdso/um_vdso.o
../arch/x86/um/vdso/um_vdso.c: Assembler messages:
../arch/x86/um/vdso/um_vdso.c:50: Error: operand size mismatch for `call'

so the cast intended to silence it.

but if I changed the constraint like below, the error is gone.

-		: "0" ((unsigned long)__NR_time), "D" (t) : "cc",
+		: "a" (__NR_time), "D" (t) : "cc",

I will clean it up as well.

> >  	return secs;
> >  }
> > diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
> > index f238f7b33cdd..83c861e2a815 100644
> > --- a/arch/x86/um/vdso/vma.c
> > +++ b/arch/x86/um/vdso/vma.c
> > @@ -9,6 +9,7 @@
> >  #include <asm/page.h>
> >  #include <asm/elf.h>
> >  #include <linux/init.h>
> > +#include <os.h>
> >  
> >  static unsigned int __read_mostly vdso_enabled = 1;
> >  unsigned long um_vdso_addr;
> > @@ -24,7 +25,9 @@ static int __init init_vdso(void)
> >  
> >  	BUG_ON(vdso_end - vdso_start > PAGE_SIZE);
> >  
> > +#ifdef CONFIG_MMU
> >  	um_vdso_addr = task_size - PAGE_SIZE;
> > +#endif
> >  
> >  	vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
> >  	if (!vdsop)
> > @@ -40,6 +43,15 @@ static int __init init_vdso(void)
> >  	copy_page(page_address(um_vdso), vdso_start);
> >  	*vdsop = um_vdso;
> >  
> > +#ifndef CONFIG_MMU
> > +	/* this is fine with NOMMU as everything is accessible */
> > +	um_vdso_addr = (unsigned long)page_address(um_vdso);
> > +	os_protect_memory((void *)um_vdso_addr, vdso_end -
> > vdso_start, 1, 1, 1);
> 
> I think this should be "1, 0, 1", i.e. we shouldn't enable write
> access.

not sure if this relates to but with PROT_WRITE off I cannot put a
breakpoint with gdb on vdso area.  normal execution (without gdb)
looks fine.


-- Hajime
diff mbox series

Patch

diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c
index cbae2584124f..eff3e6641a0e 100644
--- a/arch/x86/um/vdso/um_vdso.c
+++ b/arch/x86/um/vdso/um_vdso.c
@@ -23,10 +23,17 @@  int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts)
 {
 	long ret;
 
+#ifdef CONFIG_MMU
 	asm("syscall"
 		: "=a" (ret)
 		: "0" (__NR_clock_gettime), "D" (clock), "S" (ts)
 		: "rcx", "r11", "memory");
+#else
+	asm("call *%1"
+		: "=a" (ret)
+		: "0" ((unsigned long)__NR_clock_gettime), "D" (clock), "S" (ts)
+		: "rcx", "r11", "memory");
+#endif
 
 	return ret;
 }
@@ -37,10 +44,17 @@  int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
 {
 	long ret;
 
+#ifdef CONFIG_MMU
 	asm("syscall"
 		: "=a" (ret)
 		: "0" (__NR_gettimeofday), "D" (tv), "S" (tz)
 		: "rcx", "r11", "memory");
+#else
+	asm("call *%1"
+		: "=a" (ret)
+		: "0" ((unsigned long)__NR_gettimeofday), "D" (tv), "S" (tz)
+		: "rcx", "r11", "memory");
+#endif
 
 	return ret;
 }
@@ -51,9 +65,15 @@  __kernel_old_time_t __vdso_time(__kernel_old_time_t *t)
 {
 	long secs;
 
+#ifdef CONFIG_MMU
 	asm volatile("syscall"
 		: "=a" (secs)
 		: "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
+#else
+	asm("call *%1"
+		: "=a" (secs)
+		: "0" ((unsigned long)__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
+#endif
 
 	return secs;
 }
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index f238f7b33cdd..83c861e2a815 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -9,6 +9,7 @@ 
 #include <asm/page.h>
 #include <asm/elf.h>
 #include <linux/init.h>
+#include <os.h>
 
 static unsigned int __read_mostly vdso_enabled = 1;
 unsigned long um_vdso_addr;
@@ -24,7 +25,9 @@  static int __init init_vdso(void)
 
 	BUG_ON(vdso_end - vdso_start > PAGE_SIZE);
 
+#ifdef CONFIG_MMU
 	um_vdso_addr = task_size - PAGE_SIZE;
+#endif
 
 	vdsop = kmalloc(sizeof(struct page *), GFP_KERNEL);
 	if (!vdsop)
@@ -40,6 +43,15 @@  static int __init init_vdso(void)
 	copy_page(page_address(um_vdso), vdso_start);
 	*vdsop = um_vdso;
 
+#ifndef CONFIG_MMU
+	/* this is fine with NOMMU as everything is accessible */
+	um_vdso_addr = (unsigned long)page_address(um_vdso);
+	os_protect_memory((void *)um_vdso_addr, vdso_end - vdso_start, 1, 1, 1);
+	pr_debug("vdso_start=%lx um_vdso_addr=%lx pg_um_vdso=%lx",
+	       (unsigned long)vdso_start, um_vdso_addr,
+	       (unsigned long)page_address(um_vdso));
+#endif
+
 	return 0;
 
 oom:
@@ -50,6 +62,7 @@  static int __init init_vdso(void)
 }
 subsys_initcall(init_vdso);
 
+#ifdef CONFIG_MMU
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct vm_area_struct *vma;
@@ -74,3 +87,4 @@  int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	return IS_ERR(vma) ? PTR_ERR(vma) : 0;
 }
+#endif