diff mbox series

[RFC,06/13] x86/um: nommu: process/thread handling

Message ID 88b3cdd8cd1437e45184fd26c6df5d7ceb93de55.1729770373.git.thehajime@gmail.com
State RFC
Headers show
Series [RFC,01/13] fs: binfmt_elf_efpic: add architecture hook elf_arch_finalize_exec | expand

Commit Message

Hajime Tazaki Oct. 24, 2024, 12:09 p.m. UTC
Since ptrace facility isn't used under !MMU of UML, there is different
code path to invoke proceeses/threads; on an entry to the syscall
interface, the stack pointer should be manipulated to handle vfork(2)
return address, no external process is used, and need to properlly
configure some of registers (fs segment register for TLS, etc) on every
contex switch, etc.

Signals aren't delivered in non-ptrace syscall entry/leave so, we also
need to handle pending signal by ourselves.

Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
---
 arch/um/kernel/exec.c           |  8 ++++++++
 arch/um/kernel/process.c        | 21 ++++++++++++++++++++-
 arch/um/os-Linux/process.c      |  6 ++++++
 arch/um/os-Linux/skas/process.c |  4 ++++
 arch/x86/um/asm/processor.h     | 12 ++++++++++++
 arch/x86/um/entry_64.S          | 22 ++++++++++++++++++++++
 arch/x86/um/syscalls_64.c       | 12 ++++++++++++
 7 files changed, 84 insertions(+), 1 deletion(-)

Comments

Johannes Berg Oct. 25, 2024, 9:22 a.m. UTC | #1
On Thu, 2024-10-24 at 21:09 +0900, Hajime Tazaki wrote:
> Since ptrace facility isn't used under !MMU of UML, there is different
> code path to invoke proceeses/threads; on an entry to the syscall

typo: processes

>  /* Called magically, see new_thread_handler above */
>  static void fork_handler(void)
>  {
> -	schedule_tail(current->thread.prev_sched);
> +	if (current->thread.prev_sched != NULL)

nit: no need for "!= NULL"

> @@ -134,6 +138,21 @@ static void fork_handler(void)
>  
>  	current->thread.prev_sched = NULL;
>  
> +#ifndef CONFIG_MMU
> +	/*
> +	 * This fork can only come from libc's vfork, which
> +	 * does this:
> +	 *	popq %%rdx;
> +	 *	call *%0; // vsyscall
> +	 *	pushq %%rdx;
> +	 * %rdx stores the return address which is stored
> +	 * at pt_regs[HOST_IP] at the moment. We still
> +	 * need to pop the pushed address by "call" though,
> +	 * so this is what this next line does.
> +	 */
> +	if (current->thread.regs.regs.gp[HOST_ORIG_AX] == __NR_vfork)
> +		current->thread.regs.regs.gp[REGS_SP_INDEX] += 8;
> +#endif

Kind of ugly ... but I guess not much choice.

> +#ifndef CONFIG_MMU
> +	current_top_of_stack = task_top_of_stack(to);
> +	current_ptregs = (long)task_pt_regs(to);
> +
> +	if ((to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] == 0)
> +	    || (to->mm == NULL))

Put || on the previous line, "!to->mm"

> +		return;
> +
> +	// rkj: this changes the FS on every context switch

Not sure we're allowing C99 comments yet, and there shouldn't be a "rkj"
tag either :)

johannes
Hajime Tazaki Oct. 25, 2024, 12:58 p.m. UTC | #2
On Fri, 25 Oct 2024 18:22:29 +0900,
Johannes Berg wrote:
> 
> On Thu, 2024-10-24 at 21:09 +0900, Hajime Tazaki wrote:
> > Since ptrace facility isn't used under !MMU of UML, there is different
> > code path to invoke proceeses/threads; on an entry to the syscall
> 
> typo: processes

thanks. (I thought checkpatch.pl detects them..)

> >  /* Called magically, see new_thread_handler above */
> >  static void fork_handler(void)
> >  {
> > -	schedule_tail(current->thread.prev_sched);
> > +	if (current->thread.prev_sched != NULL)
> 
> nit: no need for "!= NULL"

will fix it.

> > @@ -134,6 +138,21 @@ static void fork_handler(void)
> >  
> >  	current->thread.prev_sched = NULL;
> >  
> > +#ifndef CONFIG_MMU
> > +	/*
> > +	 * This fork can only come from libc's vfork, which
> > +	 * does this:
> > +	 *	popq %%rdx;
> > +	 *	call *%0; // vsyscall
> > +	 *	pushq %%rdx;
> > +	 * %rdx stores the return address which is stored
> > +	 * at pt_regs[HOST_IP] at the moment. We still
> > +	 * need to pop the pushed address by "call" though,
> > +	 * so this is what this next line does.
> > +	 */
> > +	if (current->thread.regs.regs.gp[HOST_ORIG_AX] == __NR_vfork)
> > +		current->thread.regs.regs.gp[REGS_SP_INDEX] += 8;
> > +#endif
> 
> Kind of ugly ... but I guess not much choice.

(indeed)

> > +#ifndef CONFIG_MMU
> > +	current_top_of_stack = task_top_of_stack(to);
> > +	current_ptregs = (long)task_pt_regs(to);
> > +
> > +	if ((to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] == 0)
> > +	    || (to->mm == NULL))
> 
> Put || on the previous line, "!to->mm"

will fix it.

> > +		return;
> > +
> > +	// rkj: this changes the FS on every context switch
> 
> Not sure we're allowing C99 comments yet, and there shouldn't be a "rkj"
> tag either :)

this is my mistake; forgot to remove those private tags.
will fix it.

-- Hajime
diff mbox series

Patch

diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index cb8b5cd9285c..fe7d776a5962 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -24,8 +24,10 @@  void flush_thread(void)
 {
 	arch_flush_thread(&current->thread.arch);
 
+#ifdef CONFIG_MMU
 	get_safe_registers(current_pt_regs()->regs.gp,
 			   current_pt_regs()->regs.fp);
+#endif
 
 	__switch_mm(&current->mm->context.id);
 }
@@ -35,5 +37,11 @@  void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 	PT_REGS_IP(regs) = eip;
 	PT_REGS_SP(regs) = esp;
 	clear_thread_flag(TIF_SINGLESTEP);
+#ifndef CONFIG_MMU
+	current->thread.regs.regs.gp[REGS_IP_INDEX] = eip;
+	current->thread.regs.regs.gp[REGS_SP_INDEX] = esp;
+	new_thread(task_stack_page(current), &current->thread.switch_buf,
+		   (void *)eip);
+#endif
 }
 EXPORT_SYMBOL(start_thread);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index b1b608afa036..270b5bd476be 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -117,13 +117,17 @@  void new_thread_handler(void)
 	 * callback returns only if the kernel thread execs a process
 	 */
 	fn(arg);
+#ifndef CONFIG_MMU
+	arch_switch_to(current);
+#endif
 	userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
 }
 
 /* Called magically, see new_thread_handler above */
 static void fork_handler(void)
 {
-	schedule_tail(current->thread.prev_sched);
+	if (current->thread.prev_sched != NULL)
+		schedule_tail(current->thread.prev_sched);
 
 	/*
 	 * XXX: if interrupt_end() calls schedule, this call to
@@ -134,6 +138,21 @@  static void fork_handler(void)
 
 	current->thread.prev_sched = NULL;
 
+#ifndef CONFIG_MMU
+	/*
+	 * This fork can only come from libc's vfork, which
+	 * does this:
+	 *	popq %%rdx;
+	 *	call *%0; // vsyscall
+	 *	pushq %%rdx;
+	 * %rdx stores the return address which is stored
+	 * at pt_regs[HOST_IP] at the moment. We still
+	 * need to pop the pushed address by "call" though,
+	 * so this is what this next line does.
+	 */
+	if (current->thread.regs.regs.gp[HOST_ORIG_AX] == __NR_vfork)
+		current->thread.regs.regs.gp[REGS_SP_INDEX] += 8;
+#endif
 	userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
 }
 
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index b164873da2db..f08bb20d95ec 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -92,7 +92,10 @@  int os_process_parent(int pid)
 
 void os_alarm_process(int pid)
 {
+/* !CONFIG_MMU doesn't send alarm signal to other processes */
+#ifdef UML_CONFIG_MMU
 	kill(pid, SIGALRM);
+#endif
 }
 
 void os_stop_process(int pid)
@@ -114,11 +117,14 @@  void os_kill_process(int pid, int reap_child)
 
 void os_kill_ptraced_process(int pid, int reap_child)
 {
+/* !CONFIG_MMU doesn't have ptraced process */
+#ifdef UML_CONFIG_MMU
 	kill(pid, SIGKILL);
 	ptrace(PTRACE_KILL, pid);
 	ptrace(PTRACE_CONT, pid);
 	if (reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, __WALL));
+#endif
 }
 
 /* Don't use the glibc version, which caches the result in TLS. It misses some
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index b6f656bcffb1..2a0a20aa59b9 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -141,6 +141,7 @@  void wait_stub_done(int pid)
 
 extern unsigned long current_stub_stack(void);
 
+#ifdef UML_CONFIG_MMU
 static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs)
 {
 	int err;
@@ -186,6 +187,7 @@  static void handle_trap(int pid, struct uml_pt_regs *regs)
 
 	handle_syscall(regs);
 }
+#endif
 
 extern char __syscall_stub_start[];
 
@@ -336,6 +338,7 @@  int start_userspace(unsigned long stub_stack)
 	return err;
 }
 
+#ifdef UML_CONFIG_MMU
 void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 {
 	int err, status, op, pid = userspace_pid[0];
@@ -472,6 +475,7 @@  void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 		}
 	}
 }
+#endif /* UML_CONFIG_MMU */
 
 void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
 {
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 478710384b34..d88d7d9d5c18 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -38,6 +38,18 @@  static __always_inline void cpu_relax(void)
 
 #define task_pt_regs(t) (&(t)->thread.regs)
 
+#ifndef CONFIG_MMU
+#define task_top_of_stack(task) \
+({									\
+	unsigned long __ptr = (unsigned long)task->stack;	\
+	__ptr += THREAD_SIZE;			\
+	__ptr;					\
+})
+
+extern long current_top_of_stack;
+extern long current_ptregs;
+#endif
+
 #include <asm/processor-generic.h>
 
 #endif
diff --git a/arch/x86/um/entry_64.S b/arch/x86/um/entry_64.S
index 12e11ac03543..1fd5b3665efa 100644
--- a/arch/x86/um/entry_64.S
+++ b/arch/x86/um/entry_64.S
@@ -86,3 +86,25 @@  ENTRY(__kernel_vsyscall)
 	ret
 
 END(__kernel_vsyscall)
+
+// void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
+ENTRY(userspace)
+	/* align the stack for x86_64 ABI */
+	and     $-0x10, %rsp
+	/* Handle any immediate reschedules or signals */
+	call	interrupt_end
+
+	movq	current_ptregs, %rsp
+
+	POP_REGS
+
+	addq	$8, %rsp	/* skip orig_ax */
+	popq	%r11		/* pt_regs->ip */
+	addq	$8, %rsp	/* skip cs */
+	addq	$8, %rsp	/* skip flags */
+	popq	%rsp
+
+	jmp	*%r11
+
+END(userspace)
+#endif // !CONFIG_MMU
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index 6a00a28c9cca..8abf2a679578 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -51,6 +51,18 @@  void arch_switch_to(struct task_struct *to)
 	 * Nothing needs to be done on x86_64.
 	 * The FS_BASE/GS_BASE registers are saved in the ptrace register set.
 	 */
+#ifndef CONFIG_MMU
+	current_top_of_stack = task_top_of_stack(to);
+	current_ptregs = (long)task_pt_regs(to);
+
+	if ((to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] == 0)
+	    || (to->mm == NULL))
+		return;
+
+	// rkj: this changes the FS on every context switch
+	arch_prctl(to, ARCH_SET_FS,
+		   (void __user *) to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)]);
+#endif
 }
 
 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,