diff mbox series

[v2,4/4] Libatomic: Clean up AArch64 `atomic_16.S' implementation file

Message ID 20240611124250.1390575-5-victor.donascimento@arm.com
State New
Headers show
Series Libatomic: Cleanup ifunc selector and aliasing | expand

Commit Message

Victor Do Nascimento June 11, 2024, 12:42 p.m. UTC
At present, `atomic_16.S' groups different implementations of the
same functions together in the file.  Therefore, as an example,
the LSE2 implementation of `load_16' follows on immediately from its
core implementation, as does the `store_16' LSE2 implementation.

Such architectural extension-dependent implementations are dependent
on ifunc support, such that they are guarded by the relevant
preprocessor macro, i.e.  `#if HAVE_IFUNC'.

Having to apply these guards on a per-function basis adds unnecessary
clutter to the file and makes its maintenance more error-prone.

We therefore reorganize the layout of the file in such a way that all
core implementations needing no `#ifdef's are placed first, followed
by all ifunc-dependent implementations, which can all be guarded by a
single `#if HAVE_IFUNC', greatly reducing the overall number of
required `#ifdef' macros.

libatomic/ChangeLog:

	* config/linux/aarch64/atomic_16.S: Reorganize functions in
	file.
	(HAVE_FEAT_LSE2): Delete.
---
 libatomic/config/linux/aarch64/atomic_16.S | 445 +++++++++++----------
 1 file changed, 223 insertions(+), 222 deletions(-)
diff mbox series

Patch

diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
index 11a296dacc3..c44c31c6418 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -40,8 +40,6 @@ 
 
 #include "auto-config.h"
 
-#define HAVE_FEAT_LSE2	HAVE_IFUNC
-
 	.arch	armv8-a+lse
 
 #define LSE128(NAME)	libat_##NAME##_i1
@@ -116,6 +114,9 @@  NAME:				\
 #define SEQ_CST 5
 
 
+/* Core implementations: Not dependent on the presence of further architectural
+   extensions.  */
+
 ENTRY (load_16)
 	mov	x5, x0
 	cbnz	w1, 2f
@@ -134,31 +135,6 @@  ENTRY (load_16)
 END (load_16)
 
 
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (load_16, LSE2)
-	cbnz	w1, 1f
-
-	/* RELAXED.  */
-	ldp	res0, res1, [x0]
-	ret
-1:
-	cmp	w1, SEQ_CST
-	b.eq	2f
-
-	/* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
-	ldp	res0, res1, [x0]
-	dmb	ishld
-	ret
-
-	/* SEQ_CST.  */
-2:	ldar	tmp0, [x0]	/* Block reordering with Store-Release instr.  */
-	ldp	res0, res1, [x0]
-	dmb	ishld
-	ret
-END_FEAT (load_16, LSE2)
-#endif
-
-
 ENTRY (store_16)
 	cbnz	w4, 2f
 
@@ -176,23 +152,6 @@  ENTRY (store_16)
 END (store_16)
 
 
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (store_16, LSE2)
-	cbnz	w4, 1f
-
-	/* RELAXED.  */
-	stp	in0, in1, [x0]
-	ret
-
-	/* RELEASE/SEQ_CST.  */
-1:	ldxp	xzr, tmp0, [x0]
-	stlxp	w4, in0, in1, [x0]
-	cbnz	w4, 1b
-	ret
-END_FEAT (store_16, LSE2)
-#endif
-
-
 ENTRY (exchange_16)
 	mov	x5, x0
 	cbnz	w4, 2f
@@ -220,32 +179,6 @@  ENTRY (exchange_16)
 END (exchange_16)
 
 
-ENTRY_FEAT (exchange_16, LSE128)
-	mov	tmp0, x0
-	mov	res0, in0
-	mov	res1, in1
-	cbnz	w4, 1f
-
-	/* RELAXED.  */
-	/* swpp res0, res1, [tmp0]  */
-	.inst	0x192180c0
-	ret
-1:
-	cmp	w4, ACQUIRE
-	b.hi	2f
-
-	/* ACQUIRE/CONSUME.  */
-	/* swppa res0, res1, [tmp0]  */
-	.inst	0x19a180c0
-	ret
-
-	/* RELEASE/ACQ_REL/SEQ_CST.  */
-2:	/* swppal res0, res1, [tmp0]  */
-	.inst	0x19e180c0
-	ret
-END_FEAT (exchange_16, LSE128)
-
-
 ENTRY (compare_exchange_16)
 	ldp	exp0, exp1, [x1]
 	cbz	w4, 3f
@@ -293,42 +226,6 @@  ENTRY (compare_exchange_16)
 END (compare_exchange_16)
 
 
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (compare_exchange_16, LSE)
-	ldp	exp0, exp1, [x1]
-	mov	tmp0, exp0
-	mov	tmp1, exp1
-	cbz	w4, 2f
-	cmp	w4, RELEASE
-	b.hs	3f
-
-	/* ACQUIRE/CONSUME.  */
-	caspa	exp0, exp1, in0, in1, [x0]
-0:
-	cmp	exp0, tmp0
-	ccmp	exp1, tmp1, 0, eq
-	bne	1f
-	mov	x0, 1
-	ret
-1:
-	stp	exp0, exp1, [x1]
-	mov	x0, 0
-	ret
-
-	/* RELAXED.  */
-2:	casp	exp0, exp1, in0, in1, [x0]
-	b	0b
-
-	/* RELEASE.  */
-3:	b.hi	4f
-	caspl	exp0, exp1, in0, in1, [x0]
-	b	0b
-
-	/* ACQ_REL/SEQ_CST.  */
-4:	caspal	exp0, exp1, in0, in1, [x0]
-	b	0b
-END_FEAT (compare_exchange_16, LSE)
-#endif
 
 
 ENTRY_ALIASED (fetch_add_16)
@@ -441,32 +338,6 @@  ENTRY (fetch_or_16)
 END (fetch_or_16)
 
 
-ENTRY_FEAT (fetch_or_16, LSE128)
-	mov	tmp0, x0
-	mov	res0, in0
-	mov	res1, in1
-	cbnz	w4, 1f
-
-	/* RELAXED.  */
-	/* ldsetp res0, res1, [tmp0]  */
-	.inst	0x192130c0
-	ret
-1:
-	cmp	w4, ACQUIRE
-	b.hi	2f
-
-	/* ACQUIRE/CONSUME.  */
-	/* ldsetpa res0, res1, [tmp0]  */
-	.inst	0x19a130c0
-	ret
-
-	/* RELEASE/ACQ_REL/SEQ_CST.  */
-2:	/* ldsetpal res0, res1, [tmp0]  */
-	.inst	0x19e130c0
-	ret
-END_FEAT (fetch_or_16, LSE128)
-
-
 ENTRY (or_fetch_16)
 	mov	x5, x0
 	cbnz	w4, 2f
@@ -489,37 +360,6 @@  ENTRY (or_fetch_16)
 END (or_fetch_16)
 
 
-ENTRY_FEAT (or_fetch_16, LSE128)
-	cbnz	w4, 1f
-	mov	tmp0, in0
-	mov	tmp1, in1
-
-	/* RELAXED.  */
-	/* ldsetp in0, in1, [x0]  */
-	.inst	0x19233002
-	orr	res0, in0, tmp0
-	orr	res1, in1, tmp1
-	ret
-1:
-	cmp	w4, ACQUIRE
-	b.hi	2f
-
-	/* ACQUIRE/CONSUME.  */
-	/* ldsetpa in0, in1, [x0]  */
-	.inst	0x19a33002
-	orr	res0, in0, tmp0
-	orr	res1, in1, tmp1
-	ret
-
-	/* RELEASE/ACQ_REL/SEQ_CST.  */
-2:	/* ldsetpal in0, in1, [x0]  */
-	.inst	0x19e33002
-	orr	res0, in0, tmp0
-	orr	res1, in1, tmp1
-	ret
-END_FEAT (or_fetch_16, LSE128)
-
-
 ENTRY (fetch_and_16)
 	mov	x5, x0
 	cbnz	w4, 2f
@@ -542,33 +382,6 @@  ENTRY (fetch_and_16)
 END (fetch_and_16)
 
 
-ENTRY_FEAT (fetch_and_16, LSE128)
-	mov	tmp0, x0
-	mvn	res0, in0
-	mvn	res1, in1
-	cbnz	w4, 1f
-
-	/* RELAXED.  */
-	/* ldclrp res0, res1, [tmp0]  */
-	.inst	0x192110c0
-	ret
-
-1:
-	cmp	w4, ACQUIRE
-	b.hi	2f
-
-	/* ACQUIRE/CONSUME.  */
-	/* ldclrpa res0, res1, [tmp0]  */
-	.inst	0x19a110c0
-	ret
-
-	/* RELEASE/ACQ_REL/SEQ_CST.  */
-2:	/* ldclrpal res0, res1, [tmp0]  */
-	.inst	0x19e110c0
-	ret
-END_FEAT (fetch_and_16, LSE128)
-
-
 ENTRY (and_fetch_16)
 	mov	x5, x0
 	cbnz	w4, 2f
@@ -591,38 +404,6 @@  ENTRY (and_fetch_16)
 END (and_fetch_16)
 
 
-ENTRY_FEAT (and_fetch_16, LSE128)
-	mvn	tmp0, in0
-	mvn	tmp0, in1
-	cbnz	w4, 1f
-
-	/* RELAXED.  */
-	/* ldclrp tmp0, tmp1, [x0]  */
-	.inst	0x19271006
-	and	res0, tmp0, in0
-	and	res1, tmp1, in1
-	ret
-
-1:
-	cmp	w4, ACQUIRE
-	b.hi	2f
-
-	/* ACQUIRE/CONSUME.  */
-	/* ldclrpa tmp0, tmp1, [x0]  */
-	.inst	0x19a71006
-	and	res0, tmp0, in0
-	and	res1, tmp1, in1
-	ret
-
-	/* RELEASE/ACQ_REL/SEQ_CST.  */
-2:	/* ldclrpal	tmp0, tmp1, [x5]  */
-	.inst	0x19e710a6
-	and	res0, tmp0, in0
-	and	res1, tmp1, in1
-	ret
-END_FEAT (and_fetch_16, LSE128)
-
-
 ENTRY_ALIASED (fetch_xor_16)
 	mov	x5, x0
 	cbnz	w4, 2f
@@ -728,6 +509,226 @@  ENTRY_ALIASED (test_and_set_16)
 END (test_and_set_16)
 
 
+#if HAVE_IFUNC
+/* ifunc implementations: Carries run-time dependence on the presence of further
+   architectural extensions.  */
+
+ENTRY_FEAT (exchange_16, LSE128)
+	mov	tmp0, x0
+	mov	res0, in0
+	mov	res1, in1
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	/* swpp res0, res1, [tmp0]  */
+	.inst	0x192180c0
+	ret
+1:
+	cmp	w4, ACQUIRE
+	b.hi	2f
+
+	/* ACQUIRE/CONSUME.  */
+	/* swppa res0, res1, [tmp0]  */
+	.inst	0x19a180c0
+	ret
+
+	/* RELEASE/ACQ_REL/SEQ_CST.  */
+2:	/* swppal res0, res1, [tmp0]  */
+	.inst	0x19e180c0
+	ret
+END_FEAT (exchange_16, LSE128)
+
+
+ENTRY_FEAT (fetch_or_16, LSE128)
+	mov	tmp0, x0
+	mov	res0, in0
+	mov	res1, in1
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	/* ldsetp res0, res1, [tmp0]  */
+	.inst	0x192130c0
+	ret
+1:
+	cmp	w4, ACQUIRE
+	b.hi	2f
+
+	/* ACQUIRE/CONSUME.  */
+	/* ldsetpa res0, res1, [tmp0]  */
+	.inst	0x19a130c0
+	ret
+
+	/* RELEASE/ACQ_REL/SEQ_CST.  */
+2:	/* ldsetpal res0, res1, [tmp0]  */
+	.inst	0x19e130c0
+	ret
+END_FEAT (fetch_or_16, LSE128)
+
+
+ENTRY_FEAT (or_fetch_16, LSE128)
+	cbnz	w4, 1f
+	mov	tmp0, in0
+	mov	tmp1, in1
+
+	/* RELAXED.  */
+	/* ldsetp in0, in1, [x0]  */
+	.inst	0x19233002
+	orr	res0, in0, tmp0
+	orr	res1, in1, tmp1
+	ret
+1:
+	cmp	w4, ACQUIRE
+	b.hi	2f
+
+	/* ACQUIRE/CONSUME.  */
+	/* ldsetpa in0, in1, [x0]  */
+	.inst	0x19a33002
+	orr	res0, in0, tmp0
+	orr	res1, in1, tmp1
+	ret
+
+	/* RELEASE/ACQ_REL/SEQ_CST.  */
+2:	/* ldsetpal in0, in1, [x0]  */
+	.inst	0x19e33002
+	orr	res0, in0, tmp0
+	orr	res1, in1, tmp1
+	ret
+END_FEAT (or_fetch_16, LSE128)
+
+
+ENTRY_FEAT (fetch_and_16, LSE128)
+	mov	tmp0, x0
+	mvn	res0, in0
+	mvn	res1, in1
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	/* ldclrp res0, res1, [tmp0]  */
+	.inst	0x192110c0
+	ret
+
+1:
+	cmp	w4, ACQUIRE
+	b.hi	2f
+
+	/* ACQUIRE/CONSUME.  */
+	/* ldclrpa res0, res1, [tmp0]  */
+	.inst	0x19a110c0
+	ret
+
+	/* RELEASE/ACQ_REL/SEQ_CST.  */
+2:	/* ldclrpal res0, res1, [tmp0]  */
+	.inst	0x19e110c0
+	ret
+END_FEAT (fetch_and_16, LSE128)
+
+
+ENTRY_FEAT (and_fetch_16, LSE128)
+	mvn	tmp0, in0
+	mvn	tmp0, in1
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	/* ldclrp tmp0, tmp1, [x0]  */
+	.inst	0x19271006
+	and	res0, tmp0, in0
+	and	res1, tmp1, in1
+	ret
+
+1:
+	cmp	w4, ACQUIRE
+	b.hi	2f
+
+	/* ACQUIRE/CONSUME.  */
+	/* ldclrpa tmp0, tmp1, [x0]  */
+	.inst	0x19a71006
+	and	res0, tmp0, in0
+	and	res1, tmp1, in1
+	ret
+
+	/* RELEASE/ACQ_REL/SEQ_CST.  */
+2:	/* ldclrpal	tmp0, tmp1, [x5]  */
+	.inst	0x19e710a6
+	and	res0, tmp0, in0
+	and	res1, tmp1, in1
+	ret
+END_FEAT (and_fetch_16, LSE128)
+
+
+ENTRY_FEAT (load_16, LSE2)
+	cbnz	w1, 1f
+
+	/* RELAXED.  */
+	ldp	res0, res1, [x0]
+	ret
+1:
+	cmp	w1, SEQ_CST
+	b.eq	2f
+
+	/* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
+	ldp	res0, res1, [x0]
+	dmb	ishld
+	ret
+
+	/* SEQ_CST.  */
+2:	ldar	tmp0, [x0]	/* Block reordering with Store-Release instr.  */
+	ldp	res0, res1, [x0]
+	dmb	ishld
+	ret
+END_FEAT (load_16, LSE2)
+
+
+ENTRY_FEAT (store_16, LSE2)
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	stp	in0, in1, [x0]
+	ret
+
+	/* RELEASE/SEQ_CST.  */
+1:	ldxp	xzr, tmp0, [x0]
+	stlxp	w4, in0, in1, [x0]
+	cbnz	w4, 1b
+	ret
+END_FEAT (store_16, LSE2)
+
+
+ENTRY_FEAT (compare_exchange_16, LSE)
+	ldp	exp0, exp1, [x1]
+	mov	tmp0, exp0
+	mov	tmp1, exp1
+	cbz	w4, 2f
+	cmp	w4, RELEASE
+	b.hs	3f
+
+	/* ACQUIRE/CONSUME.  */
+	caspa	exp0, exp1, in0, in1, [x0]
+0:
+	cmp	exp0, tmp0
+	ccmp	exp1, tmp1, 0, eq
+	bne	1f
+	mov	x0, 1
+	ret
+1:
+	stp	exp0, exp1, [x1]
+	mov	x0, 0
+	ret
+
+	/* RELAXED.  */
+2:	casp	exp0, exp1, in0, in1, [x0]
+	b	0b
+
+	/* RELEASE.  */
+3:	b.hi	4f
+	caspl	exp0, exp1, in0, in1, [x0]
+	b	0b
+
+	/* ACQ_REL/SEQ_CST.  */
+4:	caspal	exp0, exp1, in0, in1, [x0]
+	b	0b
+END_FEAT (compare_exchange_16, LSE)
+#endif
+
 /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code.  */
 #define FEATURE_1_AND 0xc0000000
 #define FEATURE_1_BTI 1