diff mbox series

powerpc: Improve the inline asm for syscall wrappers

Message ID 75862e34-5f8c-4bdb-90ed-cc5eaf46f0b5@linux.ibm.com
State New
Headers show
Series powerpc: Improve the inline asm for syscall wrappers | expand

Commit Message

Peter Bergner Nov. 5, 2024, 10:54 p.m. UTC
Hi all,

I wrote the below patch to rewrite powerpc's syscall wrappers so they use
the more modern register constraint usage in Adhemerval's vsyscall wrapper.
The old code was written before the "+r" constraint support was added to GCC.
I also modified the code so we use the faster mfocrf instruction for Power4
and later, rather than the microcoded mfcr instruction which is pretty slow.
I tested this on powerpc64le-linux with --with-cpu={power8,power9,power10}
builds and didn't see any testsuite regressions.  Does anyone have any
thoughts on the patch?  If no one finds anything objectionable or obviously
wrong with it, I'd like to push this fairly soon.

Peter



powerpc: Improve the inline asm for syscall wrappers

Update the inline asm syscall wrappers to match the newer register constraint
usage in INTERNAL_VSYSCALL_CALL_TYPE.  Use the faster mfocrf instruction when
available, rather than the slower mfcr microcoded instruction.

---
 sysdeps/unix/sysv/linux/powerpc/sysdep.h | 42 +++++++++++++-----------
 1 file changed, 22 insertions(+), 20 deletions(-)

Comments

Peter Bergner Nov. 19, 2024, 7:12 p.m. UTC | #1
On 11/5/24 4:54 PM, Peter Bergner wrote:
> powerpc: Improve the inline asm for syscall wrappers
> 
> Update the inline asm syscall wrappers to match the newer register constraint
> usage in INTERNAL_VSYSCALL_CALL_TYPE.  Use the faster mfocrf instruction when
> available, rather than the slower mfcr microcoded instruction.

With no objections, I have pushed this.

Peter
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/powerpc/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/sysdep.h
index a69b7db338..99df985681 100644
--- a/sysdeps/unix/sysv/linux/powerpc/sysdep.h
+++ b/sysdeps/unix/sysv/linux/powerpc/sysdep.h
@@ -32,6 +32,18 @@ 
 #undef SYS_ify
 #define SYS_ify(syscall_name)  __NR_##syscall_name
 
+#define tostring(s) #s
+#define stringify(s) tostring(s)
+
+#ifdef _ARCH_PWR4
+/* Power4 and later cpus introduced a faster instruction to copy one
+   CR field, rather than the slower microcoded mfcr which copies all
+   CR fields.  */
+# define MFCR0(REG) "mfocrf " stringify(REG) ",0x80"
+#else
+# define MFCR0(REG) "mfcr " stringify(REG)
+#endif
+
 /* Define a macro which expands inline into the wrapper code for a system
    call. This use is for internal calls that do not need to handle errors
    normally. It will never touch errno. This returns just what the kernel
@@ -52,7 +64,7 @@ 
     __asm__ __volatile__						\
       ("mtctr %0\n\t"							\
        "bctrl\n\t"							\
-       "mfcr  %0\n\t"							\
+       MFCR0(%0) "\n\t"							\
        "0:"								\
        : "+r" (r0), "+r" (r3), "+r" (r4), "+r" (r5),  "+r" (r6),        \
          "+r" (r7), "+r" (r8)						\
@@ -83,11 +95,10 @@ 
        "scv 0\n\t"				\
        ".machine \"pop\"\n\t"			\
        "0:"					\
-       : "=&r" (r0),				\
-	 "=&r" (r3), "=&r" (r4), "=&r" (r5),	\
-	 "=&r" (r6), "=&r" (r7), "=&r" (r8)	\
-       : ASM_INPUT_##nr			\
-       : "r9", "r10", "r11", "r12",		\
+       : "+r" (r0),				\
+	 "+r" (r3), "+r" (r4), "+r" (r5),	\
+	 "+r" (r6), "+r" (r7), "+r" (r8)	\
+       : : "r9", "r10", "r11", "r12",		\
 	 "cr0", "cr1", "cr5", "cr6", "cr7",	\
 	 "xer", "lr", "ctr", "memory"); 	\
     r3;					\
@@ -97,13 +108,12 @@ 
   ({						\
     __asm__ __volatile__			\
       ("sc\n\t"				\
-       "mfcr %0\n\t"				\
+       MFCR0(%0) "\n\t"				\
        "0:"					\
-       : "=&r" (r0),				\
-	 "=&r" (r3), "=&r" (r4), "=&r" (r5),	\
-	 "=&r" (r6), "=&r" (r7), "=&r" (r8)	\
-       : ASM_INPUT_##nr			\
-       : "r9", "r10", "r11", "r12",		\
+       : "+r" (r0),				\
+	 "+r" (r3), "+r" (r4), "+r" (r5),	\
+	 "+r" (r6), "+r" (r7), "+r" (r8)	\
+       : : "r9", "r10", "r11", "r12",		\
 	 "xer", "cr0", "ctr", "memory");	\
     r0 & (1 << 28) ? -r3 : r3;			\
   })
@@ -199,14 +209,6 @@ 
 	  __illegally_sized_syscall_arg6 (); \
 	r8 = _arg6
 
-#define ASM_INPUT_0 "0" (r0)
-#define ASM_INPUT_1 ASM_INPUT_0, "1" (r3)
-#define ASM_INPUT_2 ASM_INPUT_1, "2" (r4)
-#define ASM_INPUT_3 ASM_INPUT_2, "3" (r5)
-#define ASM_INPUT_4 ASM_INPUT_3, "4" (r6)
-#define ASM_INPUT_5 ASM_INPUT_4, "5" (r7)
-#define ASM_INPUT_6 ASM_INPUT_5, "6" (r8)
-
 /* List of system calls which are supported as vsyscalls.  */
 #define VDSO_NAME  "LINUX_2.6.15"
 #define VDSO_HASH  123718565