diff mbox

[2/2] s390: Convert from sync to atomic optabs

Message ID 1343687574-3244-3-git-send-email-rth@redhat.com
State New
Headers show

Commit Message

Richard Henderson July 30, 2012, 10:32 p.m. UTC
Split out s390_two_part_insv from s390_expand_cs_hqi to try
harder to use bit insertion instructions in the CAS loop.
---
 gcc/config/s390/s390-protos.h |    3 +-
 gcc/config/s390/s390.c        |  141 ++++++++++-----
 gcc/config/s390/s390.md       |  401 +++++++++++++++++++++++++++++------------
 3 files changed, 383 insertions(+), 162 deletions(-)
diff mbox

Patch

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 4f1eb42..79673d6 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -85,7 +85,8 @@  extern void s390_expand_setmem (rtx, rtx, rtx);
 extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
 extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
-extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx);
+extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx,
+				rtx, rtx, bool);
 extern void s390_expand_atomic (enum machine_mode, enum rtx_code,
 				rtx, rtx, rtx, bool);
 extern rtx s390_return_addr_rtx (int, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 240fb7e..1006281 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -896,10 +896,12 @@  s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
    conditional branch testing the result.  */
 
 static rtx
-s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, rtx cmp, rtx new_rtx)
+s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
+			    rtx cmp, rtx new_rtx)
 {
-  emit_insn (gen_sync_compare_and_swapsi (old, mem, cmp, new_rtx));
-  return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), const0_rtx);
+  emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
+  return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
+			    const0_rtx);
 }
 
 /* Emit a jump instruction to TARGET.  If COND is NULL_RTX, emit an
@@ -4754,80 +4756,123 @@  init_alignment_context (struct alignment_context *ac, rtx mem,
   ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
 }
 
+/* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
+   use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
+   perform the merge in SEQ2.  */
+
+static rtx
+s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
+		    enum machine_mode mode, rtx val, rtx ins)
+{
+  rtx tmp;
+
+  if (ac->aligned)
+    {
+      start_sequence ();
+      tmp = copy_to_mode_reg (SImode, val);
+      if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
+			    const0_rtx, ins))
+	{
+	  *seq1 = NULL;
+	  *seq2 = get_insns ();
+	  end_sequence ();
+	  return tmp;
+	}
+      end_sequence ();
+    }
+
+  /* Failed to use insv.  Generate a two part shift and mask.  */
+  start_sequence ();
+  tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
+  *seq1 = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
+  *seq2 = get_insns ();
+  end_sequence ();
+
+  return tmp;
+}
+
 /* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
-   the memory location, CMP the old value to compare MEM with and NEW_RTX the value
-   to set if CMP == MEM.
-   CMP is never in memory for compare_and_swap_cc because
-   expand_bool_compare_and_swap puts it into a register for later compare.  */
+   the memory location, CMP the old value to compare MEM with and NEW_RTX the
+   value to set if CMP == MEM.  */
 
 void
-s390_expand_cs_hqi (enum machine_mode mode, rtx target, rtx mem, rtx cmp, rtx new_rtx)
+s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
+		    rtx cmp, rtx new_rtx, bool is_weak)
 {
   struct alignment_context ac;
-  rtx cmpv, newv, val, resv, cc;
+  rtx cmpv, newv, val, resv, cc, seq0, seq1, seq2, seq3;
   rtx res = gen_reg_rtx (SImode);
-  rtx csloop = gen_label_rtx ();
-  rtx csend = gen_label_rtx ();
+  rtx csloop = NULL, csend = NULL;
 
-  gcc_assert (register_operand (target, VOIDmode));
+  gcc_assert (register_operand (vtarget, VOIDmode));
   gcc_assert (MEM_P (mem));
 
   init_alignment_context (&ac, mem, mode);
 
-  /* Shift the values to the correct bit positions.  */
-  if (!(ac.aligned && MEM_P (cmp)))
-    cmp = s390_expand_mask_and_shift (cmp, mode, ac.shift);
-  if (!(ac.aligned && MEM_P (new_rtx)))
-    new_rtx = s390_expand_mask_and_shift (new_rtx, mode, ac.shift);
-
   /* Load full word.  Subsequent loads are performed by CS.  */
   val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
 			     NULL_RTX, 1, OPTAB_DIRECT);
 
+  /* Prepare insertions of cmp and new_rtx into the loaded value.  When
+     possible, we try to use insv to make this happen efficiently.  If
+     that fails we'll generate code both inside and outside the loop.  */
+  cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
+  newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
+
+  if (seq0)
+    emit_insn (seq0);
+  if (seq1)
+    emit_insn (seq1);
+
   /* Start CS loop.  */
-  emit_label (csloop);
+  if (!is_weak)
+    {
+      /* Begin assuming success.  */
+      emit_move_insn (btarget, const1_rtx);
+
+      csloop = gen_label_rtx ();
+      csend = gen_label_rtx ();
+      emit_label (csloop);
+    }
+
   /* val = "<mem>00..0<mem>"
    * cmp = "00..0<cmp>00..0"
    * new = "00..0<new>00..0"
    */
 
-  /* Patch cmp and new with val at correct position.  */
-  if (ac.aligned && MEM_P (cmp))
+  emit_insn (seq2);
+  emit_insn (seq3);
+
+  if (is_weak)
     {
-      cmpv = force_reg (SImode, val);
-      store_bit_field (cmpv, GET_MODE_BITSIZE (mode), 0,
-		       0, 0, SImode, cmp);
+      cc = s390_emit_compare_and_swap (NE, res, ac.memsi, cmpv, newv);
+      emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
     }
   else
-    cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
-						   NULL_RTX, 1, OPTAB_DIRECT));
-  if (ac.aligned && MEM_P (new_rtx))
     {
-      newv = force_reg (SImode, val);
-      store_bit_field (newv, GET_MODE_BITSIZE (mode), 0,
-		       0, 0, SImode, new_rtx);
-    }
-  else
-    newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
-						   NULL_RTX, 1, OPTAB_DIRECT));
-
-  /* Jump to end if we're done (likely?).  */
-  s390_emit_jump (csend, s390_emit_compare_and_swap (EQ, res, ac.memsi,
-						     cmpv, newv));
+      /* Jump to end if we're done (likely?).  */
+      cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
+      s390_emit_jump (csend, cc);
 
-  /* Check for changes outside mode.  */
-  resv = expand_simple_binop (SImode, AND, res, ac.modemaski,
-			      NULL_RTX, 1, OPTAB_DIRECT);
-  cc = s390_emit_compare (NE, resv, val);
-  emit_move_insn (val, resv);
-  /* Loop internal if so.  */
-  s390_emit_jump (csloop, cc);
+      /* Check for changes outside mode, and loop internal if so.  */
+      resv = expand_simple_binop (SImode, AND, res, ac.modemaski,
+			          NULL_RTX, 1, OPTAB_DIRECT);
+      cc = s390_emit_compare (NE, resv, val);
+      emit_move_insn (val, resv);
+      s390_emit_jump (csloop, cc);
 
-  emit_label (csend);
+      /* Failed.  */
+      emit_move_insn (btarget, const0_rtx);
+      emit_label (csend);
+    }
 
   /* Return the correct part of the bitfield.  */
-  convert_move (target, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
-					     NULL_RTX, 1, OPTAB_DIRECT), 1);
+  convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+					      NULL_RTX, 1, OPTAB_DIRECT), 1);
 }
 
 /* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 096f266..3314006 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -84,6 +84,7 @@ 
 
    ; Atomic Support
    UNSPEC_MB
+   UNSPEC_MOVA
 
    ; TLS relocation specifiers
    UNSPEC_TLSGD
@@ -349,21 +350,19 @@ 
 (define_mode_iterator DD_DF [DF DD])
 (define_mode_iterator TD_TF [TF TD])
 
-;; This mode iterator allows 31-bit and 64-bit TDSI patterns to be generated
-;; from the same template.
-(define_mode_iterator TDSI [(TI "TARGET_64BIT") DI SI])
-
 ;; These mode iterators allow 31-bit and 64-bit GPR patterns to be generated
 ;; from the same template.
 (define_mode_iterator GPR [(DI "TARGET_ZARCH") SI])
+(define_mode_iterator DGPR [(TI "TARGET_ZARCH") DI SI])
 (define_mode_iterator DSI [DI SI])
+(define_mode_iterator TDI [TI DI])
 
 ;; These mode iterators allow :P to be used for patterns that operate on
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
 (define_mode_iterator P [(DI "TARGET_64BIT") (SI "!TARGET_64BIT")])
 
-;; These macros refer to the actual word_mode of the configuration. This is equal
-;; to Pmode except on 31-bit machines in zarch mode.
+;; These macros refer to the actual word_mode of the configuration.
+;; This is equal to Pmode except on 31-bit machines in zarch mode.
 (define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")])
 (define_mode_iterator W  [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")])
 
@@ -379,6 +378,7 @@ 
 ;; same template.
 (define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI])
 (define_mode_iterator INTALL [TI DI SI HI QI])
+(define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI])
 
 ;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from
 ;; the same template.
@@ -487,6 +487,9 @@ 
 ;; and "cds" in DImode.
 (define_mode_attr tg [(TI "g") (DI "")])
 
+;; In TDI templates, a string like "c<d>sg".
+(define_mode_attr td [(TI "d") (DI "")])
+
 ;; In GPR templates, a string like "c<gf>dbr" will expand to "cgdbr" in DImode
 ;; and "cfdbr" in SImode.
 (define_mode_attr gf [(DI "g") (SI "f")])
@@ -8739,164 +8742,336 @@ 
 ;;
 
 ;
-; memory barrier pattern.
+; memory barrier patterns.
 ;
 
-(define_expand "memory_barrier"
-  [(set (match_dup 0)
-	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+(define_expand "mem_signal_fence"
+  [(match_operand:SI 0 "const_int_operand")]		;; model
+  ""
+{
+  /* The s390 memory model is strong enough not to require any
+     barrier in order to synchronize a thread with itself.  */
+  DONE;
+})
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand")]		;; model
   ""
 {
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
+  /* Unless this is a SEQ_CST fence, the s390 memory model is strong
+     enough not to require barriers of any kind.  */
+  if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST)
+    {
+      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+      MEM_VOLATILE_P (mem) = 1;
+      emit_insn (gen_mem_thread_fence_1 (mem));
+    }
+  DONE;
 })
 
-(define_insn "*memory_barrier"
+; Although bcr is superscalar on Z10, this variant will never
+; become part of an execution group.
+(define_insn "mem_thread_fence_1"
   [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
   ""
   "bcr\t15,0"
   [(set_attr "op_type" "RR")])
 
-; Although bcr is superscalar on Z10, this variant will never become part of
-; an execution group.
+;
+; atomic load/store operations
+;
+
+; Atomic loads need not examine the memory model at all.
+(define_expand "atomic_load<mode>"
+  [(match_operand:DINT 0 "register_operand")	;; output
+   (match_operand:DINT 1 "memory_operand")	;; memory
+   (match_operand:SI 2 "const_int_operand")]	;; model
+  ""
+{
+  if (<MODE>mode == TImode)
+    emit_insn (gen_atomic_loadti_1 (operands[0], operands[1]));
+  else if (<MODE>mode == DImode && !TARGET_ZARCH)
+    {
+      if (!s_operand (operands[1], VOIDmode))
+	{
+	  rtx a = copy_to_reg (XEXP (operands[1], 0));
+	  operands[1] = replace_equiv_address (operands[1], a);
+        }
+      emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
+    }
+  else
+    emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+; Different from movdi_31 in that we have no splitters.
+(define_insn "atomic_loaddi_1"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,!*f,!*f")
+	(unspec:DI [(match_operand:DI 1 "s_operand" "Q,S,Q,m")]
+		   UNSPEC_MOVA))]
+  "!TARGET_ZARCH"
+  "@
+   lm\t%0,%M0,%S1
+   lmy\t%0,%M0,%S1
+   ld\t%0,%1
+   ldy\t%0,%1"
+  [(set_attr "op_type" "RS,RSY,RS,RSY")
+   (set_attr "type" "lm,lm,floaddf,floaddf")])
+
+(define_insn "atomic_loadti_1"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+	(unspec:TI [(match_operand:TI 1 "memory_operand" "m")]
+		   UNSPEC_MOVA))]
+  "TARGET_ZARCH"
+  "lpq\t%0,%1"
+  [(set_attr "op_type" "RXY")])
+
+; Atomic stores must(?) enforce sequential consistency.
+(define_expand "atomic_store<mode>"
+  [(match_operand:DINT 0 "memory_operand")	;; memory
+   (match_operand:DINT 1 "register_operand")	;; input
+   (match_operand:SI 2 "const_int_operand")]	;; model
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  if (<MODE>mode == TImode)
+    emit_insn (gen_atomic_storeti_1 (operands[0], operands[1]));
+  else if (<MODE>mode == DImode && !TARGET_ZARCH)
+    {
+      if (!s_operand (operands[0], VOIDmode))
+	{
+	  rtx a = copy_to_reg (XEXP (operands[0], 0));
+	  operands[0] = replace_equiv_address (operands[0], a);
+        }
+      emit_insn (gen_atomic_storedi_1 (operands[0], operands[1]));
+    }
+  else
+    emit_move_insn (operands[0], operands[1]);
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_mem_thread_fence (operands[2]));
+  DONE;
+})
+
+; Different from movdi_31 in that we have no splitters.
+(define_insn "atomic_storedi_1"
+  [(set (match_operand:DI 0 "s_operand" "=Q,S,Q,m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d,d,!*f,!*f")]
+		   UNSPEC_MOVA))]
+  "!TARGET_ZARCH"
+  "@
+   stm\t%1,%N1,%S0
+   stmy\t%1,%N1,%S0
+   std %1,%0
+   stdy %1,%0"
+  [(set_attr "op_type" "RS,RSY,RS,RSY")
+   (set_attr "type" "stm,stm,fstoredf,fstoredf")])
+
+(define_insn "atomic_storeti_1"
+  [(set (match_operand:TI 0 "memory_operand" "=m")
+	(unspec:TI [(match_operand:TI 1 "register_operand" "r")]
+		   UNSPEC_MOVA))]
+  "TARGET_ZARCH"
+  "stpq\t%1,%0"
+  [(set_attr "op_type" "RXY")])
 
 ;
 ; compare and swap patterns.
 ;
 
-(define_expand "sync_compare_and_swap<mode>"
-  [(parallel
-    [(set (match_operand:TDSI 0 "register_operand" "")
-	  (match_operand:TDSI 1 "memory_operand" ""))
-     (set (match_dup 1)
-	  (unspec_volatile:TDSI
-	    [(match_dup 1)
-	     (match_operand:TDSI 2 "register_operand" "")
-	     (match_operand:TDSI 3 "register_operand" "")]
-	    UNSPECV_CAS))
-     (set (reg:CCZ1 CC_REGNUM)
-	  (compare:CCZ1 (match_dup 1) (match_dup 2)))])]
-  "")
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand")	;; bool success output
+   (match_operand:DGPR 1 "register_operand")	;; oldval output
+   (match_operand:DGPR 2 "s_operand")		;; memory
+   (match_operand:DGPR 3 "register_operand")	;; expected intput
+   (match_operand:DGPR 4 "register_operand")	;; newval intput
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")	;; success model
+   (match_operand:SI 7 "const_int_operand")]	;; failure model
+  ""
+{
+  rtx cc, cmp;
+  emit_insn (gen_atomic_compare_and_swap<mode>_internal
+	     (operands[1], operands[2], operands[3], operands[4]));
+  cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
+  cmp = gen_rtx_NE (SImode, cc, const0_rtx);
+  emit_insn (gen_cstorecc4 (operands[0], cmp, cc, const0_rtx));
+  DONE;
+})
 
-(define_expand "sync_compare_and_swap<mode>"
-  [(parallel
-    [(set (match_operand:HQI 0 "register_operand" "")
-	  (match_operand:HQI 1 "memory_operand" ""))
-     (set (match_dup 1)
-	  (unspec_volatile:HQI
-	    [(match_dup 1)
-	     (match_operand:HQI 2 "general_operand" "")
-	     (match_operand:HQI 3 "general_operand" "")]
-	    UNSPECV_CAS))
-     (clobber (reg:CC CC_REGNUM))])]
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand")	;; bool success output
+   (match_operand:HQI 1 "register_operand")	;; oldval output
+   (match_operand:HQI 2 "s_operand")		;; memory
+   (match_operand:HQI 3 "general_operand")	;; expected intput
+   (match_operand:HQI 4 "general_operand")	;; newval intput
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")	;; success model
+   (match_operand:SI 7 "const_int_operand")]	;; failure model
   ""
-  "s390_expand_cs_hqi (<MODE>mode, operands[0], operands[1],
-		       operands[2], operands[3]); DONE;")
+{
+  s390_expand_cs_hqi (<MODE>mode, operands[0], operands[1], operands[2],
+		      operands[3], operands[4], INTVAL (operands[5]));
+  DONE;
+})
 
-; cds, cdsg
-(define_insn "*sync_compare_and_swap<mode>"
-  [(set (match_operand:DW 0 "register_operand" "=r")
-	(match_operand:DW 1 "memory_operand" "+Q"))
+(define_expand "atomic_compare_and_swap<mode>_internal"
+  [(parallel
+     [(set (match_operand:DGPR 0 "register_operand")
+	   (match_operand:DGPR 1 "s_operand"))
+      (set (match_dup 1)
+	   (unspec_volatile:DGPR
+	     [(match_dup 1)
+	      (match_operand:DGPR 2 "register_operand")
+	      (match_operand:DGPR 3 "register_operand")]
+	     UNSPECV_CAS))
+      (set (reg:CCZ1 CC_REGNUM)
+	   (compare:CCZ1 (match_dup 1) (match_dup 2)))])]
+  "")
+
+; cdsg, csg
+(define_insn "*atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:TDI 0 "register_operand" "=r")
+	(match_operand:TDI 1 "s_operand" "+QS"))
    (set (match_dup 1)
-	(unspec_volatile:DW
+	(unspec_volatile:TDI
 	  [(match_dup 1)
-	   (match_operand:DW 2 "register_operand" "0")
-	   (match_operand:DW 3 "register_operand" "r")]
+	   (match_operand:TDI 2 "register_operand" "0")
+	   (match_operand:TDI 3 "register_operand" "r")]
 	  UNSPECV_CAS))
    (set (reg:CCZ1 CC_REGNUM)
 	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
-  ""
-  "cds<tg>\t%0,%3,%S1"
-  [(set_attr "op_type" "RS<TE>")
+  "TARGET_ZARCH"
+  "c<td>sg\t%0,%3,%S1"
+  [(set_attr "op_type" "RXY")
    (set_attr "type"   "sem")])
 
-; cs, csg
-(define_insn "*sync_compare_and_swap<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=r")
-	(match_operand:GPR 1 "memory_operand" "+Q"))
+; cds, cdsy
+(define_insn "*atomic_compare_and_swapdi_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(match_operand:DI 1 "s_operand" "+Q,S"))
    (set (match_dup 1)
-	(unspec_volatile:GPR
+	(unspec_volatile:DI
 	  [(match_dup 1)
-	   (match_operand:GPR 2 "register_operand" "0")
-	   (match_operand:GPR 3 "register_operand" "r")]
+	   (match_operand:DI 2 "register_operand" "0,0")
+	   (match_operand:DI 3 "register_operand" "r,r")]
+	  UNSPECV_CAS))
+   (set (reg:CCZ1 CC_REGNUM)
+	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
+  "!TARGET_ZARCH"
+  "@
+   cds\t%0,%3,%S1
+   cdsy\t%0,%3,%S1"
+  [(set_attr "op_type" "RX,RXY")
+   (set_attr "type" "sem")])
+
+; cs, csy
+(define_insn "*atomic_compare_and_swapsi_3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operand:SI 1 "s_operand" "+Q,S"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_dup 1)
+	   (match_operand:SI 2 "register_operand" "0,0")
+	   (match_operand:SI 3 "register_operand" "r,r")]
 	  UNSPECV_CAS))
    (set (reg:CCZ1 CC_REGNUM)
 	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
   ""
-  "cs<g>\t%0,%3,%S1"
-  [(set_attr "op_type" "RS<E>")
+  "@
+   cs\t%0,%3,%S1
+   csy\t%0,%3,%S1"
+  [(set_attr "op_type" "RX,RXY")
    (set_attr "type"   "sem")])
 
-
 ;
 ; Other atomic instruction patterns.
 ;
 
-(define_expand "sync_lock_test_and_set<mode>"
-  [(match_operand:HQI 0 "register_operand")
-   (match_operand:HQI 1 "memory_operand")
-   (match_operand:HQI 2 "general_operand")]
-  ""
-  "s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1],
-		       operands[2], false); DONE;")
-
 ; z196 load and add, xor, or and and instructions
 
-; lan, lang, lao, laog, lax, laxg, laa, laag
-(define_insn "sync_<atomic><mode>"
-  [(parallel
-    [(set (match_operand:GPR 0 "memory_operand" "+QS")
-	  (unspec_volatile:GPR
-	   [(ATOMIC_Z196:GPR (match_dup 0)
-			     (match_operand:GPR 1 "general_operand" "d"))]
-	   UNSPECV_ATOMIC_OP))
-     (clobber (match_scratch:GPR 2 "=d"))
-     (clobber (reg:CC CC_REGNUM))])]
+(define_expand "atomic_fetch_<atomic><mode>"
+  [(match_operand:GPR 0 "register_operand")		;; val out
+   (ATOMIC_Z196:GPR
+     (match_operand:GPR 1 "s_operand")			;; memory
+     (match_operand:GPR 2 "register_operand"))		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
   "TARGET_Z196"
-  "la<noxa><g>\t%2,%1,%0")
+{
+  emit_insn (gen_atomic_fetch_<atomic><mode>_iaf
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
 
 ; lan, lang, lao, laog, lax, laxg, laa, laag
-(define_insn "sync_old_<atomic><mode>"
-  [(parallel
-    [(set (match_operand:GPR 0 "register_operand" "=d")
-	  (match_operand:GPR 1 "memory_operand"   "+QS"))
-     (set (match_dup 1)
-	  (unspec_volatile:GPR
-	   [(ATOMIC_Z196:GPR (match_dup 1)
-			     (match_operand:GPR 2 "general_operand" "d"))]
-	   UNSPECV_ATOMIC_OP))
-     (clobber (reg:CC CC_REGNUM))])]
+(define_insn "atomic_fetch_<atomic><mode>_iaf"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(match_operand:GPR 1 "s_operand" "+S"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	 [(ATOMIC_Z196:GPR (match_dup 1)
+			   (match_operand:GPR 2 "general_operand" "d"))]
+	 UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))]
   "TARGET_Z196"
-  "la<noxa><g>\t%0,%2,%1")
+  "la<noxa><g>\t%0,%2,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "type" "sem")])
 
+;; For SImode and larger, the optabs.c code will do just fine in
+;; expanding a compare-and-swap loop.  For QI/HImode, we can do
+;; better by expanding our own loop.
 
-(define_expand "sync_<atomic><mode>"
-  [(set (match_operand:HQI 0 "memory_operand")
-	(ATOMIC:HQI (match_dup 0)
-		    (match_operand:HQI 1 "general_operand")))]
+(define_expand "atomic_<atomic><mode>"
+  [(ATOMIC:HQI
+     (match_operand:HQI 0 "s_operand")			;; memory
+     (match_operand:HQI 1 "general_operand"))		;; val in
+   (match_operand:SI 2 "const_int_operand")]		;; model
   ""
-  "s390_expand_atomic (<MODE>mode, <CODE>, NULL_RTX, operands[0],
-		       operands[1], false); DONE;")
+{
+  s390_expand_atomic (<MODE>mode, <CODE>, NULL_RTX, operands[0],
+		       operands[1], false);
+  DONE;
+})
 
-(define_expand "sync_old_<atomic><mode>"
-  [(set (match_operand:HQI 0 "register_operand")
-	(match_operand:HQI 1 "memory_operand"))
-   (set (match_dup 1)
-	(ATOMIC:HQI (match_dup 1)
-		    (match_operand:HQI 2 "general_operand")))]
+(define_expand "atomic_fetch_<atomic><mode>"
+  [(match_operand:HQI 0 "register_operand")		;; val out
+   (ATOMIC:HQI
+     (match_operand:HQI 1 "s_operand")			;; memory
+     (match_operand:HQI 2 "general_operand"))		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
   ""
-  "s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
-		       operands[2], false); DONE;")
-
-(define_expand "sync_new_<atomic><mode>"
-  [(set (match_operand:HQI 0 "register_operand")
-	(ATOMIC:HQI (match_operand:HQI 1 "memory_operand")
-		    (match_operand:HQI 2 "general_operand")))
-   (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))]
+{
+  s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
+		      operands[2], false);
+  DONE;
+})
+
+(define_expand "atomic_<atomic>_fetch<mode>"
+  [(match_operand:HQI 0 "register_operand")		;; val out
+   (ATOMIC:HQI
+     (match_operand:HQI 1 "s_operand")			;; memory
+     (match_operand:HQI 2 "general_operand"))		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
   ""
-  "s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
-		       operands[2], true); DONE;")
+{
+  s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
+		      operands[2], true);
+  DONE;
+})
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:HQI 0 "register_operand")		;; val out
+   (match_operand:HQI 1 "s_operand")			;; memory
+   (match_operand:HQI 2 "general_operand")		;; val in
+   (match_operand:SI 3 "const_int_operand")]		;; model
+  ""
+{
+  s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1],
+		      operands[2], false);
+  DONE;
+})
 
 ;;
 ;;- Miscellaneous instructions.