diff mbox series

[AArch64,v2,07/11] aarch64: Add out-of-line functions for LSE atomics

Message ID 20181002161915.18843-8-richard.henderson@linaro.org
State New
Headers show
Series LSE atomics out-of-line | expand

Commit Message

Richard Henderson Oct. 2, 2018, 4:19 p.m. UTC
This is the libgcc part of the interface -- providing the functions.
Rationale is provided at the top of libgcc/config/aarch64/lse.c.

	* config/aarch64/lse.c: New file.
	* config/aarch64/t-lse: New file.
	* config.host: Add t-lse to all aarch64 tuples.
---
 libgcc/config/aarch64/lse.c | 260 ++++++++++++++++++++++++++++++++++++
 libgcc/config.host          |   4 +
 libgcc/config/aarch64/t-lse |  44 ++++++
 3 files changed, 308 insertions(+)
 create mode 100644 libgcc/config/aarch64/lse.c
 create mode 100644 libgcc/config/aarch64/t-lse
diff mbox series

Patch

diff --git a/libgcc/config/aarch64/lse.c b/libgcc/config/aarch64/lse.c
new file mode 100644
index 00000000000..68ca7df667b
--- /dev/null
+++ b/libgcc/config/aarch64/lse.c
@@ -0,0 +1,260 @@ 
+/* Out-of-line LSE atomics for AArch64 architecture.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by Linaro Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+ * The problem that we are trying to solve is operating system deployment
+ * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
+ *
+ * There are a number of potential solutions for this problem which have
+ * been proposed and rejected for various reasons.  To recap:
+ *
+ * (1) Multiple builds.  The dynamic linker will examine /lib64/atomics/
+ * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
+ * However, not all Linux distributions are happy with multiple builds,
+ * and anyway it has no effect on main applications.
+ *
+ * (2) IFUNC.  We could put these functions into libgcc_s.so, and have
+ * a single copy of each function for all DSOs.  However, ARM is concerned
+ * that the branch-to-indirect-branch that is implied by using a PLT,
+ * as required by IFUNC, is too much overhead for smaller cpus.
+ *
+ * (3) Statically predicted direct branches.  This is the approach that
+ * is taken here.  These functions are linked into every DSO that uses them.
+ * All of the symbols are hidden, so that the functions are called via a
+ * direct branch.  The choice of LSE vs non-LSE is done via one byte load
+ * followed by a well-predicted direct branch.  The functions are compiled
+ * separately to minimize code size.
+ */
+
+/* Define or declare the symbol gating the LSE implementations.  */
+#ifndef L_have_atomics
+extern
+#endif
+_Bool __aa64_have_atomics __attribute__((visibility("hidden"), nocommon));
+
+/* The branch controlled by this test should be easily predicted, in that
+   it will, after constructors, always branch the same way.  The expectation
+   is that systems that implement ARMv8.1-Atomics are "beefier" than those
+   that omit the extension.  By arranging for the fall-through path to use
+   load-store-exclusive insns, we aid the branch predictor of the
+   smallest cpus.  */
+#define have_atomics  __builtin_expect (__aa64_have_atomics, 0)
+
+#ifdef L_have_atomics
+/* Disable initialization of __aa64_have_atomics during bootstrap.  */
+# ifndef inhibit_libc
+#  include <sys/auxv.h>
+/* Disable initialization if the system headers are too old.  */
+#  if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)
+static void __attribute__((constructor))
+init_have_atomics (void)
+{
+  unsigned long hwcap = getauxval (AT_HWCAP);
+  __aa64_have_atomics = (hwcap & HWCAP_ATOMICS) != 0;
+}
+#  endif /* HWCAP */
+# endif /* inhibit_libc */
+#else
+
+/* Tell the assembler to accept LSE instructions.  */
+asm(".arch armv8-a+lse");
+
+/* Turn size and memory model defines into mnemonic fragments.  */
+#if SIZE == 1
+# define S     "b"
+# define MASK  ", uxtb"
+#elif SIZE == 2
+# define S     "h"
+# define MASK  ", uxth"
+#elif SIZE == 4 || SIZE == 8
+# define S     ""
+# define MASK  ""
+#else
+# error
+#endif
+
+#if SIZE < 8
+# define T  unsigned int
+# define W  "w"
+#else
+# define T  unsigned long long
+# define W  ""
+#endif
+
+#if MODEL == 1
+# define SUFF  _relax
+# define A     ""
+# define L     ""
+#elif MODEL == 2
+# define SUFF  _acq
+# define A     "a"
+# define L     ""
+#elif MODEL == 3
+# define SUFF  _rel
+# define A     ""
+# define L     "l"
+#elif MODEL == 4
+# define SUFF  _acq_rel
+# define A     "a"
+# define L     "l"
+#else
+# error
+#endif
+
+#define NAME2(B, S, X)  __aa64_ ## B ## S ## X
+#define NAME1(B, S, X)  NAME2(B, S, X)
+#define NAME(BASE)	NAME1(BASE, SIZE, SUFF)
+
+#define str1(S)  #S
+#define str(S)   str1(S)
+
+#ifdef L_cas
+T NAME(cas) (T cmp, T new, T *ptr) __attribute__((visibility("hidden")));
+T NAME(cas) (T cmp, T new, T *ptr)
+{
+  T old;
+  unsigned tmp;
+
+  if (have_atomics)
+    __asm__("cas" A L S " %"W"0, %"W"2, %1"
+            : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+  else
+    __asm__(
+	"0: "
+	"ld" A "xr"S" %"W"0, %1\n\t"
+	"cmp %"W"0, %"W"4" MASK "\n\t"
+	"bne 1f\n\t"
+	"st" L "xr"S" %w2, %"W"3, %1\n\t"
+	"cbnz %w2, 0b\n"
+	"1:"
+	: "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
+
+  return old;
+}
+#endif
+
+#ifdef L_swp
+T NAME(swp) (T new, T *ptr) __attribute__((visibility("hidden")));
+T NAME(swp) (T new, T *ptr)
+{
+  T old;
+  unsigned tmp;
+
+  if (have_atomics)
+    __asm__("swp" A L S " %"W"2, %"W"0, %1"
+            : "=r"(old), "+m"(*ptr) : "r"(new));
+  else
+    __asm__(
+	"0: "
+	"ld" A "xr"S" %"W"0, %1\n\t"
+	"st" L "xr"S" %w2, %"W"3, %1\n\t"
+	"cbnz %w2, 0b\n"
+	"1:"
+	: "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new));
+
+  return old;
+}
+#endif
+
+#if defined(L_ldadd) || defined(L_ldclr) \
+    || defined(L_ldeor) || defined(L_ldset)
+
+#ifdef L_ldadd
+#define LDOP  ldadd
+#define OP    add
+#elif defined(L_ldclr)
+#define LDOP  ldclr
+#define OP    bic
+#elif defined(L_ldeor)
+#define LDOP  ldeor
+#define OP    eor
+#elif defined(L_ldset)
+#define LDOP  ldset
+#define OP    orr
+#else
+#error
+#endif
+
+T NAME(LDOP) (T val, T *ptr) __attribute__((visibility("hidden")));
+T NAME(LDOP) (T val, T *ptr)
+{
+  T old;
+  unsigned tmp;
+
+  if (have_atomics)
+    __asm__(str(LDOP) A L S " %"W"2, %"W"0, %1"
+            : "=r"(old), "+m"(*ptr) : "r"(val));
+  else
+    __asm__(
+	"0: "
+	"ld" A "xr"S" %"W"0, %1\n\t"
+	str(OP) " %"W"2, %"W"0, %"W"3\n\t"
+	"st" L "xr"S" %w2, %"W"2, %1\n\t"
+	"cbnz %w2, 0b\n"
+	"1:"
+	: "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(val));
+
+  return old;
+}
+#endif
+
+#if defined(L_stadd) || defined(L_stclr) \
+    || defined(L_steor) || defined(L_stset)
+
+#ifdef L_stadd
+#define STOP  stadd
+#define OP    add
+#elif defined(L_stclr)
+#define STOP  stclr
+#define OP    bic
+#elif defined(L_steor)
+#define STOP  steor
+#define OP    eor
+#elif defined(L_stset)
+#define STOP  stset
+#define OP    orr
+#else
+#error
+#endif
+
+void NAME(STOP) (T val, T *ptr) __attribute__((visibility("hidden")));
+void NAME(STOP) (T val, T *ptr)
+{
+  unsigned tmp;
+
+  if (have_atomics)
+    __asm__(str(STOP) L S " %"W"1, %0" : "+m"(*ptr) : "r"(val));
+  else
+    __asm__(
+	"0: "
+	"ldxr"S" %"W"1, %0\n\t"
+	str(OP) " %"W"1, %"W"1, %"W"2\n\t"
+	"st" L "xr"S" %w1, %"W"1, %0\n\t"
+	"cbnz %w1, 0b\n"
+	"1:"
+	: "+m"(*ptr), "=&r"(tmp) : "r"(val));
+}
+#endif
+#endif /* L_have_atomics */
diff --git a/libgcc/config.host b/libgcc/config.host
index 029f6569caf..7e9a8b6bc8f 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -340,23 +340,27 @@  aarch64*-*-elf | aarch64*-*-rtems*)
 	extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
 	extra_parts="$extra_parts crtfastmath.o"
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	md_unwind_header=aarch64/aarch64-unwind.h
 	;;
 aarch64*-*-freebsd*)
 	extra_parts="$extra_parts crtfastmath.o"
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	md_unwind_header=aarch64/freebsd-unwind.h
 	;;
 aarch64*-*-fuchsia*)
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
 	;;
 aarch64*-*-linux*)
 	extra_parts="$extra_parts crtfastmath.o"
 	md_unwind_header=aarch64/linux-unwind.h
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	;;
 alpha*-*-linux*)
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
new file mode 100644
index 00000000000..e862b0c2448
--- /dev/null
+++ b/libgcc/config/aarch64/t-lse
@@ -0,0 +1,44 @@ 
+# Out-of-line LSE atomics for AArch64 architecture.
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# Contributed by Linaro Ltd.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# CAS, Swap, Load-and-operate have 4 sizes and 4 memory models
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), cas swp ldadd ldclr ldeor ldset))
+O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
+
+# Store-and-operate has 4 sizes but only 2 memory models (relaxed, release).
+S2 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), stadd stclr steor stset))
+O2 := $(foreach m, 1 3, $(addsuffix _$(m)$(objext), $(S2)))
+
+LSE_OBJS := $(O1) $(O2)
+
+libgcc-objects += $(LSE_OBJS) have_atomic$(objext)
+
+empty      =
+space      = $(empty) $(empty)
+PAT_SPLIT  = $(subst _,$(space),$(*F))
+PAT_BASE   = $(word 1,$(PAT_SPLIT))
+PAT_N      = $(word 2,$(PAT_SPLIT))
+PAT_M      = $(word 3,$(PAT_SPLIT))
+
+have_atomic$(objext): $(srcdir)/config/aarch64/lse.c
+	$(gcc_compile) -DL_have_atomics -c $<
+
+$(LSE_OBJS): $(srcdir)/config/aarch64/lse.c
+	$(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<