diff mbox series

[2/6] LoongArch: Add ifunc support for memchr{aligned, lsx, lasx}

Message ID 20230828072651.3085034-3-dengjianbo@loongson.cn
State New
Headers show
Series LoongArch: Add ifunc support for {raw}memchr, | expand

Commit Message

dengjianbo Aug. 28, 2023, 7:26 a.m. UTC
According to glibc memchr microbenchmark, this implementation could reduce
the runtime as following:

Name               Percent of runtime reduced
memchr-lasx        37%-83%
memchr-lsx         30%-66%
memchr-aligned     0%-15%
---
 sysdeps/loongarch/lp64/multiarch/Makefile     |   3 +
 .../lp64/multiarch/ifunc-impl-list.c          |   7 ++
 .../loongarch/lp64/multiarch/ifunc-memchr.h   |  40 ++++++
 .../loongarch/lp64/multiarch/memchr-aligned.S |  95 ++++++++++++++
 .../loongarch/lp64/multiarch/memchr-lasx.S    | 117 ++++++++++++++++++
 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++++++++++++++
 sysdeps/loongarch/lp64/multiarch/memchr.c     |  37 ++++++
 7 files changed, 401 insertions(+)
 create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
 create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
 create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
 create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
 create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c
diff mbox series

Patch

diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 64416b025a..2f4802cfa4 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -24,5 +24,8 @@  sysdep_routines += \
   rawmemchr-aligned \
   rawmemchr-lsx \
   rawmemchr-lasx \
+  memchr-aligned \
+  memchr-lsx \
+  memchr-lasx \
 # sysdep_routines
 endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index 3db9af1460..a567b9cf4d 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -102,5 +102,12 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
 	      )
 
+  IFUNC_IMPL (i, name, memchr,
+#if !defined __loongarch_soft_float
+	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx)
+	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx)
+#endif
+	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
+	      )
   return i;
 }
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
new file mode 100644
index 0000000000..9060ccd54d
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
@@ -0,0 +1,40 @@ 
+/* Common definition for memchr ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+  if (SUPPORT_LASX)
+    return OPTIMIZE (lasx);
+  else if (SUPPORT_LSX)
+    return OPTIMIZE (lsx);
+  else
+#endif
+    return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
new file mode 100644
index 0000000000..81d0d00461
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
@@ -0,0 +1,95 @@ 
+/* Optimized memchr implementation using basic LoongArch instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define MEMCHR_NAME __memchr_aligned
+#else
+# define MEMCHR_NAME memchr
+#endif
+
+LEAF(MEMCHR_NAME, 6)
+    beqz        a2, L(out)
+    andi        t1, a0, 0x7
+    add.d       a5, a0, a2
+    bstrins.d   a0, zero, 2, 0
+
+    ld.d        t0, a0, 0
+    bstrins.d   a1, a1, 15, 8
+    lu12i.w     a3, 0x01010
+    slli.d      t2, t1, 03
+
+    bstrins.d   a1, a1, 31, 16
+    ori         a3, a3, 0x101
+    li.d        t7, -1
+    li.d        t8, 8
+
+    bstrins.d   a1, a1, 63, 32
+    bstrins.d   a3, a3, 63, 32
+    sll.d       t2, t7, t2
+    xor         t0, t0, a1
+
+
+    addi.d      a6, a5, -1
+    slli.d      a4, a3, 7
+    sub.d       t1, t8, t1
+    orn         t0, t0, t2
+
+    sub.d       t2, t0, a3
+    andn        t3, a4, t0
+    bstrins.d   a6, zero, 2, 0
+    and         t0, t2, t3
+
+    bgeu        t1, a2, L(end)
+L(loop):
+    bnez        t0, L(found)
+    ld.d        t1, a0, 8
+    xor         t0, t1, a1
+
+    addi.d      a0, a0, 8
+    sub.d       t2, t0, a3
+    andn        t3, a4, t0
+    and         t0, t2, t3
+
+
+    bne         a0, a6, L(loop)
+L(end):
+    sub.d       t1, a5, a6
+    ctz.d       t0, t0
+    srli.d      t0, t0, 3
+
+    sltu        t1, t0, t1
+    add.d       a0, a0, t0
+    maskeqz     a0, a0, t1
+    jr          ra
+
+L(found):
+    ctz.d       t0, t0
+    srli.d      t0, t0, 3
+    add.d       a0, a0, t0
+    jr          ra
+
+L(out):
+    move        a0, zero
+    jr          ra
+END(MEMCHR_NAME)
+
+libc_hidden_builtin_def (MEMCHR_NAME)
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
new file mode 100644
index 0000000000..a26cdf48b5
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
@@ -0,0 +1,117 @@ 
+/* Optimized memchr implementation using LoongArch LASX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCHR __memchr_lasx
+
+LEAF(MEMCHR, 6)
+    beqz            a2, L(ret0)
+    add.d           a3, a0, a2
+    andi            t0, a0, 0x3f
+    bstrins.d       a0, zero, 5, 0
+
+    xvld            xr0, a0, 0
+    xvld            xr1, a0, 32
+    li.d            t1, -1
+    li.d            t2, 64
+
+    xvreplgr2vr.b   xr2, a1
+    sll.d           t3, t1, t0
+    sub.d           t2, t2, t0
+    xvseq.b         xr0, xr0, xr2
+
+    xvseq.b         xr1, xr1, xr2
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+
+
+    xvpickve.w      xr4, xr1, 4
+    vilvl.h         vr0, vr3, vr0
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+
+    movfr2gr.d      t0, fa0
+    and             t0, t0, t3
+    bgeu            t2, a2, L(end)
+    bnez            t0, L(found)
+
+    addi.d          a4, a3, -1
+    bstrins.d       a4, zero, 5, 0
+L(loop):
+    xvld            xr0, a0, 64
+    xvld            xr1, a0, 96
+
+    addi.d          a0, a0, 64
+    xvseq.b         xr0, xr0, xr2
+    xvseq.b         xr1, xr1, xr2
+    beq             a0, a4, L(out)
+
+
+    xvmax.bu        xr3, xr0, xr1
+    xvseteqz.v      fcc0, xr3
+    bcnez           fcc0, L(loop)
+    xvmsknz.b       xr0, xr0
+
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+    vilvl.h         vr0, vr3, vr0
+
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+L(found):
+    ctz.d           t1, t0
+
+    add.d           a0, a0, t1
+    jr              ra
+L(ret0):
+    move            a0, zero
+    jr              ra
+
+
+L(out):
+    xvmsknz.b       xr0, xr0
+    xvmsknz.b       xr1, xr1
+    xvpickve.w      xr3, xr0, 4
+    xvpickve.w      xr4, xr1, 4
+
+    vilvl.h         vr0, vr3, vr0
+    vilvl.h         vr1, vr4, vr1
+    vilvl.w         vr0, vr1, vr0
+    movfr2gr.d      t0, fa0
+
+L(end):
+    sub.d           t2, zero, a3
+    srl.d           t1, t1, t2
+    and             t0, t0, t1
+    ctz.d           t1, t0
+
+    add.d           a0, a0, t1
+    maskeqz         a0, a0, t0
+    jr              ra
+END(MEMCHR)
+
+libc_hidden_builtin_def (MEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
new file mode 100644
index 0000000000..a73ecd2599
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
@@ -0,0 +1,102 @@ 
+/* Optimized memchr implementation using LoongArch LSX instructions.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define MEMCHR __memchr_lsx
+
+LEAF(MEMCHR, 6)
+    beqz            a2, L(ret0)
+    add.d           a3, a0, a2
+    andi            t0, a0, 0x1f
+    bstrins.d       a0, zero, 4, 0
+
+    vld             vr0, a0, 0
+    vld             vr1, a0, 16
+    li.d            t1, -1
+    li.d            t2, 32
+
+    vreplgr2vr.b    vr2, a1
+    sll.d           t3, t1, t0
+    sub.d           t2, t2, t0
+    vseq.b          vr0, vr0, vr2
+
+    vseq.b          vr1, vr1, vr2
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+
+
+    movfr2gr.s      t0, fa0
+    and             t0, t0, t3
+    bgeu            t2, a2, L(end)
+    bnez            t0, L(found)
+
+    addi.d          a4, a3, -1
+    bstrins.d       a4, zero, 4, 0
+L(loop):
+    vld             vr0, a0, 32
+    vld             vr1, a0, 48
+
+    addi.d          a0, a0, 32
+    vseq.b          vr0, vr0, vr2
+    vseq.b          vr1, vr1, vr2
+    beq             a0, a4, L(out)
+
+    vmax.bu         vr3, vr0, vr1
+    vseteqz.v       fcc0, vr3
+    bcnez           fcc0, L(loop)
+    vmsknz.b        vr0, vr0
+
+
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+L(found):
+    ctz.w           t0, t0
+
+    add.d           a0, a0, t0
+    jr              ra
+L(ret0):
+    move            a0, zero
+    jr              ra
+
+L(out):
+    vmsknz.b        vr0, vr0
+    vmsknz.b        vr1, vr1
+    vilvl.h         vr0, vr1, vr0
+    movfr2gr.s      t0, fa0
+
+L(end):
+    sub.d           t2, zero, a3
+    srl.w           t1, t1, t2
+    and             t0, t0, t1
+    ctz.w           t1, t0
+
+
+    add.d           a0, a0, t1
+    maskeqz         a0, a0, t0
+    jr              ra
+END(MEMCHR)
+
+libc_hidden_builtin_def (MEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c
new file mode 100644
index 0000000000..059479c0ce
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/memchr.c
@@ -0,0 +1,37 @@ 
+/* Multiple versions of memchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memchr __redirect_memchr
+# include <string.h>
+# undef memchr
+
+# define SYMBOL_NAME memchr
+# include "ifunc-memchr.h"
+
+libc_ifunc_redirected (__redirect_memchr, memchr,
+		       IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
+# endif
+
+#endif