@@ -626,7 +626,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
struct atomic_ool_names
{
- const char *str[4][4];
+ const char *str[5][4];
};
rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
@@ -1606,10 +1606,33 @@ emit_set_insn (rtx x, rtx y)
rtx
aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
{
- machine_mode mode = SELECT_CC_MODE (code, x, y);
- rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+ machine_mode cmp_mode = GET_MODE (x);
+ machine_mode cc_mode;
+ rtx cc_reg;
- emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+ if (cmp_mode == E_TImode)
+ {
+ gcc_assert (code == NE);
+
+ cc_mode = E_CCmode;
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+
+ rtx x_lo = operand_subword (x, 0, 0, TImode);
+ rtx y_lo = operand_subword (y, 0, 0, TImode);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
+
+ rtx x_hi = operand_subword (x, 1, 0, TImode);
+ rtx y_hi = operand_subword (y, 1, 0, TImode);
+ emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi,
+ gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
+ GEN_INT (AARCH64_EQ)));
+ }
+ else
+ {
+ cc_mode = SELECT_CC_MODE (code, x, y);
+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
+ }
return cc_reg;
}
@@ -6689,7 +6712,7 @@ sizetochar (int size)
'S/T/U/V': Print a FP/SIMD register name for a register list.
The register printed is the FP/SIMD register name
of X + 0/1/2/3 for S/T/U/V.
- 'R': Print a scalar FP/SIMD register name + 1.
+ 'R': Print a scalar Integer/FP/SIMD register name + 1.
'X': Print bottom 16 bits of integer constant in hex.
'w/x': Print a general register name or the zero register
(32-bit or 64-bit).
@@ -6881,12 +6904,13 @@ aarch64_print_operand (FILE *f, rtx x, int code)
break;
case 'R':
- if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
- {
- output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
- return;
- }
- asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+ if (REG_P (x) && FP_REGNUM_P (REGNO (x)))
+ asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+ else if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
+ asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1);
+ else
+ output_operand_lossage ("incompatible register operand for '%%%c'",
+ code);
break;
case 'X':
@@ -14139,16 +14163,26 @@ static void
aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
rtx mem, rtx model_rtx)
{
- emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
+ if (mode == E_TImode)
+ emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval),
+ gen_highpart (DImode, rval),
+ mem, model_rtx));
+ else
+ emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx));
}
/* Emit store exclusive. */
static void
aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
- rtx rval, rtx mem, rtx model_rtx)
+ rtx mem, rtx val, rtx model_rtx)
{
- emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx));
+ if (mode == E_TImode)
+ emit_insn (gen_aarch64_store_exclusive_pair
+ (bval, mem, operand_subword (val, 0, 0, TImode),
+ operand_subword (val, 1, 0, TImode), model_rtx));
+ else
+ emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, val, model_rtx));
}
/* Mark the previous jump instruction as unlikely. */
@@ -14160,7 +14194,7 @@ aarch64_emit_unlikely_jump (rtx insn)
add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
}
-/* We store the names of the various atomic helpers in a 4x4 array.
+/* We store the names of the various atomic helpers in a 5x4 array.
Return the libcall function given MODE, MODEL and NAMES. */
rtx
@@ -14184,6 +14218,9 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
case E_DImode:
mode_idx = 3;
break;
+ case E_TImode:
+ mode_idx = 4;
+ break;
default:
gcc_unreachable ();
}
@@ -14218,9 +14255,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
"__aa64_" #B #N "_rel", \
"__aa64_" #B #N "_acq_rel" }
-#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8)
+#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
+ { NULL, NULL, NULL, NULL }
+#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
-static const atomic_ool_names aarch64_ool_cas_names = { { DEF4(cas) } };
+static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
@@ -14243,6 +14282,7 @@ const atomic_ool_names aarch64_ool_steor_names = { { DEF4(eor) } };
#undef DEF0
#undef DEF4
+#undef DEF5
/* Expand a compare and swap pattern. */
@@ -89,6 +89,7 @@ asm(".arch armv8-a+lse");
#elif SIZE == 4 || SIZE == 8
# define S ""
# define MASK ""
+#elif SIZE == 16
#else
# error
#endif
@@ -96,9 +97,11 @@ asm(".arch armv8-a+lse");
#if SIZE < 8
# define T unsigned int
# define W "w"
-#else
+#elif SIZE == 8
# define T unsigned long long
# define W ""
+#else
+# define T unsigned __int128
#endif
#if MODEL == 1
@@ -136,19 +139,38 @@ T NAME(cas)(T cmp, T new, T *ptr)
unsigned tmp;
if (have_atomics)
- __asm__("cas" A L S " %"W"0, %"W"2, %1"
- : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+ {
+#if SIZE == 16
+ __asm__("casp" A L " %0, %R0, %2, %R2, %1"
+ : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+#else
+ __asm__("cas" A L S " %"W"0, %"W"2, %1"
+ : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp));
+#endif
+ }
else
- __asm__(
- "0: "
- "ld" A "xr"S" %"W"0, %1\n\t"
- "cmp %"W"0, %"W"4" MASK "\n\t"
- "bne 1f\n\t"
- "st" L "xr"S" %w2, %"W"3, %1\n\t"
- "cbnz %w2, 0b\n"
- "1:"
- : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
-
+ {
+#if SIZE == 16
+ __asm__("0: "
+ "ld" A "xp %0, %R0, %1\n\t"
+ "cmp %0, %4\n\t"
+ "ccmp %R0, %R4, #0, eq\n\t"
+ "bne 1f\n\t"
+ "st" L "xp %w2, %3, %R3, %1\n\t"
+ "cbnz %w2, 0b\n"
+ "1:"
+ : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
+#else
+ __asm__("0: "
+ "ld" A "xr"S" %"W"0, %1\n\t"
+ "cmp %"W"0, %"W"4" MASK "\n\t"
+ "bne 1f\n\t"
+ "st" L "xr"S" %w2, %"W"3, %1\n\t"
+ "cbnz %w2, 0b\n"
+ "1:"
+ : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp));
+#endif
+ }
return old;
}
#endif
@@ -22,10 +22,10 @@
(define_expand "@atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand" "") ;; bool out
- (match_operand:ALLI 1 "register_operand" "") ;; val out
- (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
- (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected
- (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
+ (match_operand:ALLI_TI 1 "register_operand" "") ;; val out
+ (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory
+ (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected
+ (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired
(match_operand:SI 5 "const_int_operand") ;; is_weak
(match_operand:SI 6 "const_int_operand") ;; mod_s
(match_operand:SI 7 "const_int_operand")] ;; mod_f
@@ -88,6 +88,30 @@
}
)
+(define_insn_and_split "@aarch64_compare_and_swap<mode>"
+ [(set (reg:CC CC_REGNUM) ;; bool out
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+ (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:JUST_TI
+ [(match_operand:JUST_TI 2 "register_operand" "r") ;; expect
+ (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+ UNSPECV_ATOMIC_CMPSW))
+ (clobber (match_scratch:SI 7 "=&r"))]
+ ""
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_split_compare_and_swap (operands);
+ DONE;
+ }
+)
+
(define_insn "@aarch64_compare_and_swap<mode>_lse"
[(set (match_operand:SI 0 "register_operand" "=r") ;; val out
(zero_extend:SI
@@ -133,6 +157,28 @@
return "casal<atomic_sfx>\t%<w>0, %<w>3, %1";
})
+(define_insn "@aarch64_compare_and_swap<mode>_lse"
+ [(set (match_operand:JUST_TI 0 "register_operand" "=r") ;; val out
+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:JUST_TI
+ [(match_operand:JUST_TI 2 "register_operand" "0") ;; expect
+ (match_operand:JUST_TI 3 "register_operand" "r") ;; desired
+ (match_operand:SI 4 "const_int_operand")] ;; mod_s
+ UNSPECV_ATOMIC_CMPSW))]
+ "TARGET_LSE"
+{
+ enum memmodel model = memmodel_from_int (INTVAL (operands[4]));
+ if (is_mm_relaxed (model))
+ return "casp\t%0, %R0, %3, %R3, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "caspa\t%0, %R0, %3, %R3, %1";
+ else if (is_mm_release (model))
+ return "caspl\t%0, %R0, %3, %R3, %1";
+ else
+ return "caspal\t%0, %R0, %3, %R3, %1";
+})
+
(define_expand "atomic_exchange<mode>"
[(match_operand:ALLI 0 "register_operand" "")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
@@ -650,6 +696,24 @@
}
)
+(define_insn "aarch64_load_exclusive_pair"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec_volatile:DI
+ [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPECV_LX))
+ (set (match_operand:DI 1 "register_operand" "=r")
+ (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))]
+ ""
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model))
+ return "ldxp\t%0, %1, %2";
+ else
+ return "ldaxp\t%0, %1, %2";
+ }
+)
+
(define_insn "@aarch64_store_exclusive<mode>"
[(set (match_operand:SI 0 "register_operand" "=&r")
(unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
@@ -668,6 +732,25 @@
}
)
+(define_insn "aarch64_store_exclusive_pair"
+ [(set (match_operand:SI 0 "register_operand" "=&r")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+ (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q")
+ (unspec_volatile:TI
+ [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
+ (match_operand:DI 3 "aarch64_reg_or_zero" "rZ")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPECV_SX))]
+ ""
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
+ return "stxp\t%w0, %x2, %x3, %1";
+ else
+ return "stlxp\t%w0, %x2, %x3, %1";
+ }
+)
+
(define_expand "mem_thread_fence"
[(match_operand:SI 0 "const_int_operand" "")]
""
@@ -29,6 +29,9 @@
;; Iterator for HI, SI, DI, some instructions can only work on these modes.
(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI])
+;; "Iterator" for just TI -- features like @pattern only work with iterators.
+(define_mode_iterator JUST_TI [TI])
+
;; Iterator for QI and HI modes
(define_mode_iterator SHORT [QI HI])
@@ -18,15 +18,19 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-# CAS, Swap, Load-and-operate have 4 sizes and 4 memory models
-S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), cas swp ldadd ldclr ldeor ldset))
+# Compare-and-swap has 5 sizes and 4 memory models.
+S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
+O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
+
+# Swap, Load-and-operate have 4 sizes and 4 memory models
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
# Store-and-operate has 4 sizes but only 2 memory models (relaxed, release).
S2 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), stadd stclr steor stset))
O2 := $(foreach m, 1 3, $(addsuffix _$(m)$(objext), $(S2)))
-LSE_OBJS := $(O1) $(O2)
+LSE_OBJS := $(O0) $(O1) $(O2)
libgcc-objects += $(LSE_OBJS) have_atomic$(objext)
From: Richard Henderson <richard.henderson@linaro.org> This pattern will only be used with the __sync functions, because we do not yet have a bare TImode atomic load. * config/aarch64/aarch64.c (aarch64_gen_compare_reg): Add support for NE comparison of TImode values. (aarch64_print_operand): Extend %R to handle general registers. (aarch64_emit_load_exclusive): Add support for TImode. (aarch64_emit_store_exclusive): Likewise. (aarch64_atomic_ool_func): Likewise. (aarch64_ool_cas_names): Likewise. * config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI_TI>): Change iterator from ALLI to ALLI_TI. (@atomic_compare_and_swap<JUST_TI>): New. (@atomic_compare_and_swap<JUST_TI>_lse): New. (aarch64_load_exclusive_pair): New. (aarch64_store_exclusive_pair): New. * config/aarch64/iterators.md (JUST_TI): New. * config/aarch64/lse.c (cas): Add support for SIZE == 16. * config/aarch64/t-lse (S0, O0): Split out cas. (LSE_OBJS): Include $(O0). --- gcc/config/aarch64/aarch64-protos.h | 2 +- gcc/config/aarch64/aarch64.c | 72 ++++++++++++++++++----- libgcc/config/aarch64/lse.c | 48 ++++++++++----- gcc/config/aarch64/atomics.md | 91 +++++++++++++++++++++++++++-- gcc/config/aarch64/iterators.md | 3 + libgcc/config/aarch64/t-lse | 10 +++- 6 files changed, 189 insertions(+), 37 deletions(-)