diff mbox

[ARM] Inline expansion of __sync_* builtins.

Message ID 000f01cb3868$8f84cb30$ae8e6190$@shawcroft@arm.com
State New
Headers show

Commit Message

Marcus Shawcroft Aug. 10, 2010, 8:46 a.m. UTC
This patch inlines builtin synchronization primitives for ARM targets that
provide load/store exclusive.

All __sync_* builtins applied to a 32bit object will be inlined on ARM
targets which provide the ldrex/strex instructions, primitives applied to 8
or 16 bit objects will be inlined on targets which provide
ldrex{b,h}/strex{b,h} in all other cases we fall back to the existing
library functions. The __sync_synchronize() builtin will emit an inline DMB
or MCR instruction on targets where this is appropriate.

The ldrex/strex synchronization loops are represented as opaque instruction
blobs. This provides a guarantee that no memory reference can be inserted
between the ldrex / strex pair.

Memory references in the ldrex*/strex* instructions are always indirect, we
do not (yet) exploit offset addressing on targets where this is available.

2010-08-10  Marcus Shawcroft  <marcus.shawcroft@arm.com>

        * config/arm/arm-protos.h (arm_expand_sync): New.
        (arm_output_memory_barrier, arm_output_sync_insn): New.
        (arm_sync_loop_insns): New.
        * config/arm/arm.c (FL_ARCH7): New.
        (FL_FOR_ARCH7): Include FL_ARCH7.
        (arm_arch7): New.
        (arm_print_operand): Support %C markup.
        (arm_legitimize_sync_memory): New.
        (arm_emit, arm_insn_count, arm_count, arm_output_asm_insn): New.
        (arm_process_output_memory_barrier, arm_output_memory_barrier): New.
        (arm_ldrex_suffix, arm_output_ldrex, arm_output_strex): New.
        (arm_output_op2, arm_output_op3, arm_output_sync_loop): New.
        (arm_get_sync_operand, FETCH_SYNC_OPERAND): New.
        (arm_process_output_sync_insn, arm_output_sync_insn): New.
        (arm_sync_loop_insns,arm_call_generator, arm_expand_sync): New.
        * config/arm/arm.h (struct arm_sync_generator): New.
        (TARGET_HAVE_DMB, TARGET_HAVE_DMB_MCR): New.
        (TARGET_HAVE_LDREX, TARGET_HAVE_LDREXBHD): New.
        * config/arm/arm.md: Include sync.md.
        (UNSPEC_MEMORY_BARRIER): New.
        (VUNSPEC_SYNC_COMPARE_AND_SWAP, VUNSPEC_SYNC_LOCK, VUNSPEC_SYNC_OP):
New.
        (VUNSPEC_SYNC_NEW_OP, VUNSPEC_SYNC_OLD_OP): New.
        (sync_result, sync_memory, sync_required_value): New attributes.
        (sync_new_value, sync_t1, sync_t2): Likewise.
        (sync_release_barrier, sync_op): Likewise.
        (length): Add logic to length attribute defintion to call
        arm_sync_loop_insns when appropriate.
        * config/arm/sync.md: New file.

--
Marcus Shawcroft
ARM

Comments

Richard Earnshaw Aug. 10, 2010, 2:07 p.m. UTC | #1
On Tue, 2010-08-10 at 09:46 +0100, Marcus Shawcroft wrote:
> This patch inlines builtin synchronization primitives for ARM targets that
> provide load/store exclusive.
> 
> All __sync_* builtins applied to a 32bit object will be inlined on ARM
> targets which provide the ldrex/strex instructions, primitives applied to 8
> or 16 bit objects will be inlined on targets which provide
> ldrex{b,h}/strex{b,h} in all other cases we fall back to the existing
> library functions. The __sync_synchronize() builtin will emit an inline DMB
> or MCR instruction on targets where this is appropriate.
>
> The ldrex/strex synchronization loops are represented as opaque instruction
> blobs. This provides a guarantee that no memory reference can be inserted
> between the ldrex / strex pair.
> 
> Memory references in the ldrex*/strex* instructions are always indirect, we
> do not (yet) exploit offset addressing on targets where this is available.
> 
> 2010-08-10  Marcus Shawcroft  <marcus.shawcroft@arm.com>
> 
>         * config/arm/arm-protos.h (arm_expand_sync): New.
>         (arm_output_memory_barrier, arm_output_sync_insn): New.
>         (arm_sync_loop_insns): New.
>         * config/arm/arm.c (FL_ARCH7): New.
>         (FL_FOR_ARCH7): Include FL_ARCH7.
>         (arm_arch7): New.
>         (arm_print_operand): Support %C markup.
>         (arm_legitimize_sync_memory): New.
>         (arm_emit, arm_insn_count, arm_count, arm_output_asm_insn): New.
>         (arm_process_output_memory_barrier, arm_output_memory_barrier): New.
>         (arm_ldrex_suffix, arm_output_ldrex, arm_output_strex): New.
>         (arm_output_op2, arm_output_op3, arm_output_sync_loop): New.
>         (arm_get_sync_operand, FETCH_SYNC_OPERAND): New.
>         (arm_process_output_sync_insn, arm_output_sync_insn): New.
>         (arm_sync_loop_insns,arm_call_generator, arm_expand_sync): New.
>         * config/arm/arm.h (struct arm_sync_generator): New.
>         (TARGET_HAVE_DMB, TARGET_HAVE_DMB_MCR): New.
>         (TARGET_HAVE_LDREX, TARGET_HAVE_LDREXBHD): New.
>         * config/arm/arm.md: Include sync.md.
>         (UNSPEC_MEMORY_BARRIER): New.
>         (VUNSPEC_SYNC_COMPARE_AND_SWAP, VUNSPEC_SYNC_LOCK, VUNSPEC_SYNC_OP):
> New.
>         (VUNSPEC_SYNC_NEW_OP, VUNSPEC_SYNC_OLD_OP): New.
>         (sync_result, sync_memory, sync_required_value): New attributes.
>         (sync_new_value, sync_t1, sync_t2): Likewise.
>         (sync_release_barrier, sync_op): Likewise.
>         (length): Add logic to length attribute defintion to call
>         arm_sync_loop_insns when appropriate.
>         * config/arm/sync.md: New file.
> 

+/* Emit the memory barrier instruction, if any, provided by this
+   target to a specified emitter.  */
+static void
+arm_process_output_memory_barrier (emit_f emit, rtx *operands)
+{
+  if (TARGET_HAVE_DMB)
+    {
+      /* Note we issue a system level barrier. We should consider
+         issuing a inner shareabilty zone barrier here instead, ie.
+         "DMB ISH".  */
+      emit (0, "dmb\tsy", operands);
+    }
+  else if (TARGET_HAVE_DMB_MCR)
+    emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
+}

I'm uncomfortable with the fact that this can fall through and not emit
a barrier in some circumstances.  Shouldn't it fall back to a library
call?  Otherwise code built for a uni-processor may fail to run on a
later multi-processor core.

Apart from this, I can't see any other problems.

R.
David Daney Aug. 10, 2010, 4:07 p.m. UTC | #2
On 08/10/2010 01:46 AM, Marcus Shawcroft wrote:
[...]
>          * config/arm/sync.md: New file.

I didn't see that file in the patch.  Did I miss it?

David Daney
diff mbox

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 61701a8..cd29ef1 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -144,6 +144,11 @@  extern const char *vfp_output_fstmd (rtx *);
 extern void arm_set_return_address (rtx, rtx);
 extern int arm_eliminable_register (rtx);
 extern const char *arm_output_shift(rtx *, int);
+extern void arm_expand_sync (enum machine_mode, struct arm_sync_generator *,
+ 			     rtx, rtx, rtx, rtx);
+extern const char *arm_output_memory_barrier (rtx *);
+extern const char *arm_output_sync_insn (rtx, rtx *);
+extern unsigned int arm_sync_loop_insns (rtx , rtx *);
 
 extern bool arm_output_addr_const_extra (FILE *, rtx);
 
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index a406c29..c05d54b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -593,6 +593,7 @@  static int thumb_call_reg_needed;
 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
 #define FL_ARCH7EM    (1 << 21)	      /* Instructions present in the ARMv7E-M
 					 architecture.  */
+#define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
 
 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
 
@@ -617,7 +618,7 @@  static int thumb_call_reg_needed;
 #define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
 #define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
 #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
-#define FL_FOR_ARCH7	(FL_FOR_ARCH6T2 &~ FL_NOTM)
+#define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
 #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
 #define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
 #define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_DIV)
@@ -655,6 +656,9 @@  int arm_arch6 = 0;
 /* Nonzero if this chip supports the ARM 6K extensions.  */
 int arm_arch6k = 0;
 
+/* Nonzero if this chip supports the ARM 7 extensions.  */
+int arm_arch7 = 0;
+
 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 int arm_arch_notm = 0;
 
@@ -1589,6 +1593,7 @@  arm_override_options (void)
   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+  arm_arch7 = (insn_flags & FL_ARCH7) != 0;
   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
@@ -16249,6 +16254,17 @@  arm_print_operand (FILE *stream, rtx x, int code)
       }
       return;
 
+    case 'C':
+      {
+	rtx addr;
+
+	gcc_assert (GET_CODE (x) == MEM);
+	addr = XEXP (x, 0);
+	gcc_assert (GET_CODE (addr) == REG);
+	asm_fprintf (stream, "[%r]", REGNO (addr));
+      }
+      return;
+
     /* Translate an S register number into a D register number and element index.  */
     case 'y':
       {
@@ -22577,4 +22593,365 @@  arm_have_conditional_execution (void)
   return !TARGET_THUMB1;
 }
 
+/* Legitimize a memory reference for sync primitive implemented using
+   ldrex / strex.  We currently force the form of the reference to be
+   indirect without offset.  We do not yet support the indirect offset
+   addressing supported by some ARM targets for these
+   instructions.  */
+static rtx
+arm_legitimize_sync_memory (rtx memory)
+{
+  rtx addr = force_reg (Pmode, XEXP (memory, 0));
+  rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
+
+  set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
+  return legitimate_memory;
+}
+
+/* An instruction emitter. */
+typedef void (* emit_f) (int label, const char *, rtx *);
+
+/* An instruction emitter that emits via the conventional
+   output_asm_insn.  */
+static void
+arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
+{
+  output_asm_insn (pattern, operands);
+}
+
+/* Count the number of emitted synchronization instructions.  */
+static unsigned arm_insn_count;
+
+/* An emitter that counts emitted instructions but does not actually
+   emit instruction into the the instruction stream.  */
+static void
+arm_count (int label,
+	   const char *pattern ATTRIBUTE_UNUSED,
+	   rtx *operands ATTRIBUTE_UNUSED)
+{
+  if (! label)
+    ++ arm_insn_count;
+}
+
+/* Construct a pattern using conventional output formatting and feed
+   it to output_asm_insn.  Provides a mechanism to construct the
+   output pattern on the fly.  Note the hard limit on the pattern
+   buffer size.  */
+static void
+arm_output_asm_insn (emit_f emit, int label, rtx *operands,
+		     const char *pattern, ...)
+{
+  va_list ap;
+  char buffer[256];
+
+  va_start (ap, pattern);
+  vsprintf (buffer, pattern, ap);
+  va_end (ap);
+  emit (label, buffer, operands);
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+   target to a specified emitter.  */
+static void
+arm_process_output_memory_barrier (emit_f emit, rtx *operands)
+{
+  if (TARGET_HAVE_DMB)
+    {
+      /* Note we issue a system level barrier. We should consider
+         issuing a inner shareabilty zone barrier here instead, ie.
+         "DMB ISH".  */
+      emit (0, "dmb\tsy", operands);
+    }
+  else if (TARGET_HAVE_DMB_MCR)
+    emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+   target.  */
+const char *
+arm_output_memory_barrier (rtx *operands)
+{
+  arm_process_output_memory_barrier (arm_emit, operands);
+  return "";
+}
+
+/* Helper to figure out the instruction suffix required on ldrex/strex
+   for operations on an object of the specified mode.  */
+static const char *
+arm_ldrex_suffix (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode: return "b";
+    case HImode: return "h";
+    case SImode: return "";
+    case DImode: return "d";
+    default:
+      gcc_unreachable ();
+    }
+  return "";
+}
+
+/* Emit an ldrex{b,h,d, } instruction appropriate for the specified
+   mode.  */
+static void
+arm_output_ldrex (emit_f emit,
+		  enum machine_mode mode,
+		  rtx target,
+		  rtx memory)
+{
+  const char *suffix = arm_ldrex_suffix (mode);
+  rtx operands[2];
+
+  operands[0] = target;
+  operands[1] = memory;
+  arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
+}
+
+/* Emit a strex{b,h,d, } instruction appropriate for the specified
+   mode.  */
+static void
+arm_output_strex (emit_f emit,
+		  enum machine_mode mode,
+		  const char *cc,
+		  rtx result,
+		  rtx value,
+		  rtx memory)
+{
+  const char *suffix = arm_ldrex_suffix (mode);
+  rtx operands[3];
+
+  operands[0] = result;
+  operands[1] = value;
+  operands[2] = memory;
+  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
+		       cc);
+}
+
+/* Helper to emit a two operand instruction.  */
+static void
+arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
+{
+  rtx operands[2];
+
+  operands[0] = d;
+  operands[1] = s;
+  arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
+}
+
+/* Helper to emit a three operand instruction.  */
+static void
+arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
+{
+  rtx operands[3];
+
+  operands[0] = d;
+  operands[1] = a;
+  operands[2] = b;
+  arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
+}
+
+/* Emit a load store exclusive synchronization loop.
+
+   do
+     old_value = [mem]
+     if old_value != required_value
+       break;
+     t1 = sync_op (old_value, new_value)
+     [mem] = t1, t2 = [0|1]
+   while ! t2
+
+   Note:
+     t1 == t2 is not permitted
+     t1 == old_value is permitted
+
+   required_value:
+
+   RTX register or const_int representing the required old_value for
+   the modify to continue, if NULL no comparsion is performed.  */
+static void
+arm_output_sync_loop (emit_f emit,
+		      enum machine_mode mode,
+		      rtx old_value,
+		      rtx memory,
+		      rtx required_value,
+		      rtx new_value,
+		      rtx t1,
+		      rtx t2,
+		      enum attr_sync_op sync_op,
+		      int early_barrier_required)
+{
+  rtx operands[1];
+
+  gcc_assert (t1 != t2);
+
+  if (early_barrier_required)
+    arm_process_output_memory_barrier (emit, NULL);
+
+  arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
+
+  arm_output_ldrex (emit, mode, old_value, memory);
+
+  if (required_value)
+    {
+      rtx operands[2];
+
+      operands[0] = old_value;
+      operands[1] = required_value;
+      arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
+      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
+    }
+
+  switch (sync_op)
+    {
+    case SYNC_OP_ADD:
+      arm_output_op3 (emit, "add", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_SUB:
+      arm_output_op3 (emit, "sub", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_IOR:
+      arm_output_op3 (emit, "orr", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_XOR:
+      arm_output_op3 (emit, "eor", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_AND:
+      arm_output_op3 (emit,"and", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_NAND:
+      arm_output_op3 (emit, "and", t1, old_value, new_value);
+      arm_output_op2 (emit, "mvn", t1, t1);
+      break;
+
+    case SYNC_OP_NONE:
+      t1 = new_value;
+      break;
+    }
+
+  arm_output_strex (emit, mode, "", t2, t1, memory);
+  operands[0] = t2;
+  arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+  arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
+
+  arm_process_output_memory_barrier (emit, NULL);
+  arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
+}
+
+static rtx
+arm_get_sync_operand (rtx *operands, int index, rtx default_value)
+{
+  if (index > 0)
+    default_value = operands[index - 1];
+
+  return default_value;
+}
+
+#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
+  arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
+
+/* Extract the operands for a synchroniztion instruction from the
+   instructions attributes and emit the instruction.  */
+static void
+arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
+{
+  rtx result, memory, required_value, new_value, t1, t2;
+  int early_barrier;
+  enum machine_mode mode;
+  enum attr_sync_op sync_op;
+
+  result = FETCH_SYNC_OPERAND(result, 0);
+  memory = FETCH_SYNC_OPERAND(memory, 0);
+  required_value = FETCH_SYNC_OPERAND(required_value, 0);
+  new_value = FETCH_SYNC_OPERAND(new_value, 0);
+  t1 = FETCH_SYNC_OPERAND(t1, 0);
+  t2 = FETCH_SYNC_OPERAND(t2, 0);
+  early_barrier =
+    get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
+  sync_op = get_attr_sync_op (insn);
+  mode = GET_MODE (memory);
+
+  arm_output_sync_loop (emit, mode, result, memory, required_value,
+			new_value, t1, t2, sync_op, early_barrier);
+}
+
+/* Emit a synchronization instruction loop.  */
+const char *
+arm_output_sync_insn (rtx insn, rtx *operands)
+{
+  arm_process_output_sync_insn (arm_emit, insn, operands);
+  return "";
+}
+
+/* Count the number of machine instruction that will be emitted for a
+   synchronization instruction.  Note that the emitter used does not
+   emit instructions, it just counts instructions being carefull not
+   to count labels.  */
+unsigned int
+arm_sync_loop_insns (rtx insn, rtx *operands)
+{
+  arm_insn_count = 0;
+  arm_process_output_sync_insn (arm_count, insn, operands);
+  return arm_insn_count;
+}
+
+/* Helper to call a target sync instruction generator, dealing with
+   the variation in operands required by the different generators.  */
+static rtx
+arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
+  		    rtx memory, rtx required_value, rtx new_value)
+{
+  switch (generator->op)
+    {
+    case arm_sync_generator_omn:
+      gcc_assert (! required_value);
+      return generator->u.omn (old_value, memory, new_value);
+
+    case arm_sync_generator_omrn:
+      gcc_assert (required_value);
+      return generator->u.omrn (old_value, memory, required_value, new_value);
+    }
+
+  return NULL;
+}
+
+/* Expand a synchronization loop. The synchronization loop is expanded
+   as an opaque block of instructions in order to ensure that we do
+   not subsequently get extraneous memory accesses inserted within the
+   critical region. The exclusive access property of ldrex/strex is
+   only guaranteed in there are no intervening memory accesses. */
+void
+arm_expand_sync (enum machine_mode mode,
+		 struct arm_sync_generator *generator,
+		 rtx target, rtx memory, rtx required_value, rtx new_value)
+{
+  if (target == NULL)
+    target = gen_reg_rtx (mode);
+
+  memory = arm_legitimize_sync_memory (memory);
+  if (mode != SImode)
+    {
+      rtx load_temp = gen_reg_rtx (SImode);
+
+      if (required_value)
+	required_value = convert_modes (SImode, mode, required_value, true);
+
+      new_value = convert_modes (SImode, mode, new_value, true);
+      emit_insn (arm_call_generator (generator, load_temp, memory,
+				     required_value, new_value));
+      emit_move_insn (target, gen_lowpart (mode, load_temp));
+    }
+  else
+    {
+      emit_insn (arm_call_generator (generator, target, memory, required_value,
+				     new_value));
+    }
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 29f31f2..ec935da 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -126,6 +126,24 @@  enum target_cpus
 /* The processor for which instructions should be scheduled.  */
 extern enum processor_type arm_tune;
 
+enum arm_sync_generator_tag
+  {
+    arm_sync_generator_omn,
+    arm_sync_generator_omrn
+  };
+
+/* Wrapper to pass around a polymorphic pointer to a sync instruction
+   generator and.  */
+struct arm_sync_generator
+{
+  enum arm_sync_generator_tag op;
+  union
+  {
+    rtx (* omn) (rtx, rtx, rtx);
+    rtx (* omrn) (rtx, rtx, rtx, rtx);
+  } u;
+};
+
 typedef enum arm_cond_code
 {
   ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC,
@@ -270,6 +288,17 @@  extern void (*arm_lang_output_object_attributes_hook)(void);
    for Thumb-2.  */
 #define TARGET_UNIFIED_ASM TARGET_THUMB2
 
+/* Nonzero if this chip provides the DMB instruction.  */
+#define TARGET_HAVE_DMB		(arm_arch7)
+
+/* Nonzero if this chip implements a memory barrier via CP15.  */
+#define TARGET_HAVE_DMB_MCR	(arm_arch6k && ! TARGET_HAVE_DMB)
+
+/* Nonzero if this chip supports ldrex and strex */
+#define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
+
+/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}.  */
+#define TARGET_HAVE_LDREXBHD	((arm_arch6k && TARGET_ARM) || arm_arch7)
 
 /* True iff the full BPABI is being used.  If TARGET_BPABI is true,
    then TARGET_AAPCS_BASED must be true -- but the converse does not
@@ -403,6 +432,12 @@  extern int arm_arch5e;
 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 extern int arm_arch6;
 
+/* Nonzero if this chip supports the ARM Architecture 6k extensions.  */
+extern int arm_arch6k;
+
+/* Nonzero if this chip supports the ARM Architecture 7 extensions.  */
+extern int arm_arch7;
+
 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 extern int arm_arch_notm;
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 7196532..0fc3328 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -103,6 +103,7 @@ 
    (UNSPEC_RBIT 26)       ; rbit operation.
    (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from
                              ; another symbolic address.
+   (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
   ]
 )
 
@@ -137,6 +138,11 @@ 
    (VUNSPEC_WCMP_GT  14) ; Used by the iwMMXT WCMPGT instructions
    (VUNSPEC_EH_RETURN 20); Use to override the return address for exception
 			 ; handling.
+   (VUNSPEC_SYNC_COMPARE_AND_SWAP 21)	; Represent an atomic compare swap.
+   (VUNSPEC_SYNC_LOCK             22)	; Represent a sync_lock_test_and_set.
+   (VUNSPEC_SYNC_OP               23)	; Represent a sync_<op>
+   (VUNSPEC_SYNC_NEW_OP           24)	; Represent a sync_new_<op>
+   (VUNSPEC_SYNC_OLD_OP           25)	; Represent a sync_old_<op>
   ]
 )
 
@@ -164,8 +170,21 @@ 
 (define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
   (const (symbol_ref "arm_fpu_attr")))
 
+(define_attr "sync_result"          "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_memory"          "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_required_value"  "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_new_value"       "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_t1"              "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_t2"              "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_release_barrier" "yes,no"           (const_string "yes"))
+(define_attr "sync_op"              "none,add,sub,ior,xor,and,nand"
+                                    (const_string "none"))
+
 ; LENGTH of an instruction (in bytes)
-(define_attr "length" "" (const_int 4))
+(define_attr "length" ""
+  (cond [(not (eq_attr "sync_memory" "none"))
+ 	   (symbol_ref "arm_sync_loop_insns (insn, operands) * 4")
+	] (const_int 4)))
 
 ; POOL_RANGE is how far away from a constant pool entry that this insn
 ; can be placed.  If the distance is zero, then this insn will never
@@ -10614,4 +10633,5 @@ 
 (include "thumb2.md")
 ;; Neon patterns
 (include "neon.md")
-
+;; Synchronization Primitives
+(include "sync.md")