@@ -144,6 +144,11 @@ extern const char *vfp_output_fstmd (rtx *);
extern void arm_set_return_address (rtx, rtx);
extern int arm_eliminable_register (rtx);
extern const char *arm_output_shift(rtx *, int);
+extern void arm_expand_sync (enum machine_mode, struct arm_sync_generator *,
+ rtx, rtx, rtx, rtx);
+extern const char *arm_output_memory_barrier (rtx *);
+extern const char *arm_output_sync_insn (rtx, rtx *);
+extern unsigned int arm_sync_loop_insns (rtx , rtx *);
extern bool arm_output_addr_const_extra (FILE *, rtx);
@@ -593,6 +593,7 @@ static int thumb_call_reg_needed;
#define FL_NEON (1 << 20) /* Neon instructions. */
#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
architecture. */
+#define FL_ARCH7 (1 << 22) /* Architecture 7. */
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
@@ -617,7 +618,7 @@ static int thumb_call_reg_needed;
#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
-#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
+#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
@@ -655,6 +656,9 @@ int arm_arch6 = 0;
/* Nonzero if this chip supports the ARM 6K extensions. */
int arm_arch6k = 0;
+/* Nonzero if this chip supports the ARM 7 extensions. */
+int arm_arch7 = 0;
+
/* Nonzero if instructions not present in the 'M' profile can be used. */
int arm_arch_notm = 0;
@@ -1589,6 +1593,7 @@ arm_override_options (void)
arm_arch6 = (insn_flags & FL_ARCH6) != 0;
arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+ arm_arch7 = (insn_flags & FL_ARCH7) != 0;
arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
@@ -16249,6 +16254,17 @@ arm_print_operand (FILE *stream, rtx x, int code)
}
return;
+ case 'C':
+ {
+ rtx addr;
+
+ gcc_assert (GET_CODE (x) == MEM);
+ addr = XEXP (x, 0);
+ gcc_assert (GET_CODE (addr) == REG);
+ asm_fprintf (stream, "[%r]", REGNO (addr));
+ }
+ return;
+
/* Translate an S register number into a D register number and element index. */
case 'y':
{
@@ -22577,4 +22593,365 @@ arm_have_conditional_execution (void)
return !TARGET_THUMB1;
}
+/* Legitimize a memory reference for sync primitive implemented using
+ ldrex / strex. We currently force the form of the reference to be
+ indirect without offset. We do not yet support the indirect offset
+ addressing supported by some ARM targets for these
+ instructions. */
+static rtx
+arm_legitimize_sync_memory (rtx memory)
+{
+ rtx addr = force_reg (Pmode, XEXP (memory, 0));
+ rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
+
+ set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
+ MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
+ return legitimate_memory;
+}
+
+/* An instruction emitter. */
+typedef void (* emit_f) (int label, const char *, rtx *);
+
+/* An instruction emitter that emits via the conventional
+ output_asm_insn. */
+static void
+arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
+{
+ output_asm_insn (pattern, operands);
+}
+
+/* Count the number of emitted synchronization instructions. */
+static unsigned arm_insn_count;
+
+/* An emitter that counts emitted instructions but does not actually
+ emit instruction into the the instruction stream. */
+static void
+arm_count (int label,
+ const char *pattern ATTRIBUTE_UNUSED,
+ rtx *operands ATTRIBUTE_UNUSED)
+{
+ if (! label)
+ ++ arm_insn_count;
+}
+
+/* Construct a pattern using conventional output formatting and feed
+ it to output_asm_insn. Provides a mechanism to construct the
+ output pattern on the fly. Note the hard limit on the pattern
+ buffer size. */
+static void
+arm_output_asm_insn (emit_f emit, int label, rtx *operands,
+ const char *pattern, ...)
+{
+ va_list ap;
+ char buffer[256];
+
+ va_start (ap, pattern);
+ vsprintf (buffer, pattern, ap);
+ va_end (ap);
+ emit (label, buffer, operands);
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+ target to a specified emitter. */
+static void
+arm_process_output_memory_barrier (emit_f emit, rtx *operands)
+{
+ if (TARGET_HAVE_DMB)
+ {
+ /* Note we issue a system level barrier. We should consider
+ issuing a inner shareabilty zone barrier here instead, ie.
+ "DMB ISH". */
+ emit (0, "dmb\tsy", operands);
+ }
+ else if (TARGET_HAVE_DMB_MCR)
+ emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+ target. */
+const char *
+arm_output_memory_barrier (rtx *operands)
+{
+ arm_process_output_memory_barrier (arm_emit, operands);
+ return "";
+}
+
+/* Helper to figure out the instruction suffix required on ldrex/strex
+ for operations on an object of the specified mode. */
+static const char *
+arm_ldrex_suffix (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case QImode: return "b";
+ case HImode: return "h";
+ case SImode: return "";
+ case DImode: return "d";
+ default:
+ gcc_unreachable ();
+ }
+ return "";
+}
+
+/* Emit an ldrex{b,h,d, } instruction appropriate for the specified
+ mode. */
+static void
+arm_output_ldrex (emit_f emit,
+ enum machine_mode mode,
+ rtx target,
+ rtx memory)
+{
+ const char *suffix = arm_ldrex_suffix (mode);
+ rtx operands[2];
+
+ operands[0] = target;
+ operands[1] = memory;
+ arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
+}
+
+/* Emit a strex{b,h,d, } instruction appropriate for the specified
+ mode. */
+static void
+arm_output_strex (emit_f emit,
+ enum machine_mode mode,
+ const char *cc,
+ rtx result,
+ rtx value,
+ rtx memory)
+{
+ const char *suffix = arm_ldrex_suffix (mode);
+ rtx operands[3];
+
+ operands[0] = result;
+ operands[1] = value;
+ operands[2] = memory;
+ arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
+ cc);
+}
+
+/* Helper to emit a two operand instruction. */
+static void
+arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
+{
+ rtx operands[2];
+
+ operands[0] = d;
+ operands[1] = s;
+ arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
+}
+
+/* Helper to emit a three operand instruction. */
+static void
+arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
+{
+ rtx operands[3];
+
+ operands[0] = d;
+ operands[1] = a;
+ operands[2] = b;
+ arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
+}
+
+/* Emit a load store exclusive synchronization loop.
+
+ do
+ old_value = [mem]
+ if old_value != required_value
+ break;
+ t1 = sync_op (old_value, new_value)
+ [mem] = t1, t2 = [0|1]
+ while ! t2
+
+ Note:
+ t1 == t2 is not permitted
+ t1 == old_value is permitted
+
+ required_value:
+
+ RTX register or const_int representing the required old_value for
+ the modify to continue, if NULL no comparsion is performed. */
+static void
+arm_output_sync_loop (emit_f emit,
+ enum machine_mode mode,
+ rtx old_value,
+ rtx memory,
+ rtx required_value,
+ rtx new_value,
+ rtx t1,
+ rtx t2,
+ enum attr_sync_op sync_op,
+ int early_barrier_required)
+{
+ rtx operands[1];
+
+ gcc_assert (t1 != t2);
+
+ if (early_barrier_required)
+ arm_process_output_memory_barrier (emit, NULL);
+
+ arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
+
+ arm_output_ldrex (emit, mode, old_value, memory);
+
+ if (required_value)
+ {
+ rtx operands[2];
+
+ operands[0] = old_value;
+ operands[1] = required_value;
+ arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
+ }
+
+ switch (sync_op)
+ {
+ case SYNC_OP_ADD:
+ arm_output_op3 (emit, "add", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_SUB:
+ arm_output_op3 (emit, "sub", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_IOR:
+ arm_output_op3 (emit, "orr", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_XOR:
+ arm_output_op3 (emit, "eor", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_AND:
+ arm_output_op3 (emit,"and", t1, old_value, new_value);
+ break;
+
+ case SYNC_OP_NAND:
+ arm_output_op3 (emit, "and", t1, old_value, new_value);
+ arm_output_op2 (emit, "mvn", t1, t1);
+ break;
+
+ case SYNC_OP_NONE:
+ t1 = new_value;
+ break;
+ }
+
+ arm_output_strex (emit, mode, "", t2, t1, memory);
+ operands[0] = t2;
+ arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+ arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
+
+ arm_process_output_memory_barrier (emit, NULL);
+ arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
+}
+
+static rtx
+arm_get_sync_operand (rtx *operands, int index, rtx default_value)
+{
+ if (index > 0)
+ default_value = operands[index - 1];
+
+ return default_value;
+}
+
+#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
+ arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
+
+/* Extract the operands for a synchroniztion instruction from the
+ instructions attributes and emit the instruction. */
+static void
+arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
+{
+ rtx result, memory, required_value, new_value, t1, t2;
+ int early_barrier;
+ enum machine_mode mode;
+ enum attr_sync_op sync_op;
+
+ result = FETCH_SYNC_OPERAND(result, 0);
+ memory = FETCH_SYNC_OPERAND(memory, 0);
+ required_value = FETCH_SYNC_OPERAND(required_value, 0);
+ new_value = FETCH_SYNC_OPERAND(new_value, 0);
+ t1 = FETCH_SYNC_OPERAND(t1, 0);
+ t2 = FETCH_SYNC_OPERAND(t2, 0);
+ early_barrier =
+ get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
+ sync_op = get_attr_sync_op (insn);
+ mode = GET_MODE (memory);
+
+ arm_output_sync_loop (emit, mode, result, memory, required_value,
+ new_value, t1, t2, sync_op, early_barrier);
+}
+
+/* Emit a synchronization instruction loop. */
+const char *
+arm_output_sync_insn (rtx insn, rtx *operands)
+{
+ arm_process_output_sync_insn (arm_emit, insn, operands);
+ return "";
+}
+
+/* Count the number of machine instruction that will be emitted for a
+ synchronization instruction. Note that the emitter used does not
+ emit instructions, it just counts instructions being carefull not
+ to count labels. */
+unsigned int
+arm_sync_loop_insns (rtx insn, rtx *operands)
+{
+ arm_insn_count = 0;
+ arm_process_output_sync_insn (arm_count, insn, operands);
+ return arm_insn_count;
+}
+
+/* Helper to call a target sync instruction generator, dealing with
+ the variation in operands required by the different generators. */
+static rtx
+arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
+ rtx memory, rtx required_value, rtx new_value)
+{
+ switch (generator->op)
+ {
+ case arm_sync_generator_omn:
+ gcc_assert (! required_value);
+ return generator->u.omn (old_value, memory, new_value);
+
+ case arm_sync_generator_omrn:
+ gcc_assert (required_value);
+ return generator->u.omrn (old_value, memory, required_value, new_value);
+ }
+
+ return NULL;
+}
+
+/* Expand a synchronization loop. The synchronization loop is expanded
+ as an opaque block of instructions in order to ensure that we do
+ not subsequently get extraneous memory accesses inserted within the
+ critical region. The exclusive access property of ldrex/strex is
+ only guaranteed in there are no intervening memory accesses. */
+void
+arm_expand_sync (enum machine_mode mode,
+ struct arm_sync_generator *generator,
+ rtx target, rtx memory, rtx required_value, rtx new_value)
+{
+ if (target == NULL)
+ target = gen_reg_rtx (mode);
+
+ memory = arm_legitimize_sync_memory (memory);
+ if (mode != SImode)
+ {
+ rtx load_temp = gen_reg_rtx (SImode);
+
+ if (required_value)
+ required_value = convert_modes (SImode, mode, required_value, true);
+
+ new_value = convert_modes (SImode, mode, new_value, true);
+ emit_insn (arm_call_generator (generator, load_temp, memory,
+ required_value, new_value));
+ emit_move_insn (target, gen_lowpart (mode, load_temp));
+ }
+ else
+ {
+ emit_insn (arm_call_generator (generator, target, memory, required_value,
+ new_value));
+ }
+}
+
#include "gt-arm.h"
@@ -126,6 +126,24 @@ enum target_cpus
/* The processor for which instructions should be scheduled. */
extern enum processor_type arm_tune;
+enum arm_sync_generator_tag
+ {
+ arm_sync_generator_omn,
+ arm_sync_generator_omrn
+ };
+
+/* Wrapper to pass around a polymorphic pointer to a sync instruction
+ generator and. */
+struct arm_sync_generator
+{
+ enum arm_sync_generator_tag op;
+ union
+ {
+ rtx (* omn) (rtx, rtx, rtx);
+ rtx (* omrn) (rtx, rtx, rtx, rtx);
+ } u;
+};
+
typedef enum arm_cond_code
{
ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC,
@@ -270,6 +288,17 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
for Thumb-2. */
#define TARGET_UNIFIED_ASM TARGET_THUMB2
+/* Nonzero if this chip provides the DMB instruction. */
+#define TARGET_HAVE_DMB (arm_arch7)
+
+/* Nonzero if this chip implements a memory barrier via CP15. */
+#define TARGET_HAVE_DMB_MCR (arm_arch6k && ! TARGET_HAVE_DMB)
+
+/* Nonzero if this chip supports ldrex and strex */
+#define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7)
+
+/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */
+#define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7)
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
then TARGET_AAPCS_BASED must be true -- but the converse does not
@@ -403,6 +432,12 @@ extern int arm_arch5e;
/* Nonzero if this chip supports the ARM Architecture 6 extensions. */
extern int arm_arch6;
+/* Nonzero if this chip supports the ARM Architecture 6k extensions. */
+extern int arm_arch6k;
+
+/* Nonzero if this chip supports the ARM Architecture 7 extensions. */
+extern int arm_arch7;
+
/* Nonzero if instructions not present in the 'M' profile can be used. */
extern int arm_arch_notm;
@@ -103,6 +103,7 @@
(UNSPEC_RBIT 26) ; rbit operation.
(UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from
; another symbolic address.
+ (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
]
)
@@ -137,6 +138,11 @@
(VUNSPEC_WCMP_GT 14) ; Used by the iwMMXT WCMPGT instructions
(VUNSPEC_EH_RETURN 20); Use to override the return address for exception
; handling.
+ (VUNSPEC_SYNC_COMPARE_AND_SWAP 21) ; Represent an atomic compare swap.
+ (VUNSPEC_SYNC_LOCK 22) ; Represent a sync_lock_test_and_set.
+ (VUNSPEC_SYNC_OP 23) ; Represent a sync_<op>
+ (VUNSPEC_SYNC_NEW_OP 24) ; Represent a sync_new_<op>
+ (VUNSPEC_SYNC_OLD_OP 25) ; Represent a sync_old_<op>
]
)
@@ -164,8 +170,21 @@
(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
(const (symbol_ref "arm_fpu_attr")))
+(define_attr "sync_result" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_memory" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_required_value" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_new_value" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_t1" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_t2" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_release_barrier" "yes,no" (const_string "yes"))
+(define_attr "sync_op" "none,add,sub,ior,xor,and,nand"
+ (const_string "none"))
+
; LENGTH of an instruction (in bytes)
-(define_attr "length" "" (const_int 4))
+(define_attr "length" ""
+ (cond [(not (eq_attr "sync_memory" "none"))
+ (symbol_ref "arm_sync_loop_insns (insn, operands) * 4")
+ ] (const_int 4)))
; POOL_RANGE is how far away from a constant pool entry that this insn
; can be placed. If the distance is zero, then this insn will never
@@ -10614,4 +10633,5 @@
(include "thumb2.md")
;; Neon patterns
(include "neon.md")
-
+;; Synchronization Primitives
+(include "sync.md")