===================================================================
@@ -17748,7 +17748,8 @@ emit_unlikely_jump (rtx cond, rtx label)
}
/* A subroutine of the atomic operation splitters. Emit a load-locked
- instruction in MODE. */
+ instruction in MODE. For QI/HImode, possibly use a pattern than includes
+ the zero_extend operation. */
static void
emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -17757,12 +17758,26 @@ emit_load_locked (enum machine_mode mode
switch (mode)
{
+ case QImode:
+ fn = gen_load_lockedqi;
+ break;
+ case HImode:
+ fn = gen_load_lockedhi;
+ break;
case SImode:
- fn = gen_load_lockedsi;
+ if (GET_MODE (mem) == QImode)
+ fn = gen_load_lockedqi_si;
+ else if (GET_MODE (mem) == HImode)
+ fn = gen_load_lockedhi_si;
+ else
+ fn = gen_load_lockedsi;
break;
case DImode:
fn = gen_load_lockeddi;
break;
+ case TImode:
+ fn = gen_load_lockedti;
+ break;
default:
gcc_unreachable ();
}
@@ -17779,12 +17794,21 @@ emit_store_conditional (enum machine_mod
switch (mode)
{
+ case QImode:
+ fn = gen_store_conditionalqi;
+ break;
+ case HImode:
+ fn = gen_store_conditionalhi;
+ break;
case SImode:
fn = gen_store_conditionalsi;
break;
case DImode:
fn = gen_store_conditionaldi;
break;
+ case TImode:
+ fn = gen_store_conditionalti;
+ break;
default:
gcc_unreachable ();
}
@@ -17931,7 +17955,7 @@ rs6000_expand_atomic_compare_and_swap (r
{
rtx boolval, retval, mem, oldval, newval, cond;
rtx label1, label2, x, mask, shift;
- enum machine_mode mode;
+ enum machine_mode mode, orig_mode;
enum memmodel mod_s, mod_f;
bool is_weak;
@@ -17943,22 +17967,29 @@ rs6000_expand_atomic_compare_and_swap (r
is_weak = (INTVAL (operands[5]) != 0);
mod_s = (enum memmodel) INTVAL (operands[6]);
mod_f = (enum memmodel) INTVAL (operands[7]);
- mode = GET_MODE (mem);
+ orig_mode = mode = GET_MODE (mem);
mask = shift = NULL_RTX;
if (mode == QImode || mode == HImode)
{
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
- /* Shift and mask OLDVAL into position with the word. */
+ /* Before power8, we didn't have access to lbarx/lharx, so generate a
+ lwarx and shift/mask operations. With power8, we need to do the
+ comparison in SImode, but the store is still done in QI/HImode. */
oldval = convert_modes (SImode, mode, oldval, 1);
- oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- /* Shift and mask NEWVAL into position within the word. */
- newval = convert_modes (SImode, mode, newval, 1);
- newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ if (!TARGET_SYNC_HI_QI)
+ {
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+ /* Shift and mask OLDVAL into position with the word. */
+ oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+ /* Shift and mask NEWVAL into position within the word. */
+ newval = convert_modes (SImode, mode, newval, 1);
+ newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ }
/* Prepare to adjust the return value. */
retval = gen_reg_rtx (SImode);
@@ -17987,7 +18018,25 @@ rs6000_expand_atomic_compare_and_swap (r
}
cond = gen_reg_rtx (CCmode);
- x = gen_rtx_COMPARE (CCmode, x, oldval);
+ /* If we have TImode, synthesize a comparison. */
+ if (mode != TImode)
+ x = gen_rtx_COMPARE (CCmode, x, oldval);
+ else
+ {
+ rtx xor1_result = gen_reg_rtx (DImode);
+ rtx xor2_result = gen_reg_rtx (DImode);
+ rtx or_result = gen_reg_rtx (DImode);
+ rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+ rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+ rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+ rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+ emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+ emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+ emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+ x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+ }
+
emit_insn (gen_rtx_SET (VOIDmode, cond, x));
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17997,7 +18046,7 @@ rs6000_expand_atomic_compare_and_swap (r
if (mask)
x = rs6000_mask_atomic_subword (retval, newval, mask);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (orig_mode, cond, mem, x);
if (!is_weak)
{
@@ -18015,6 +18064,8 @@ rs6000_expand_atomic_compare_and_swap (r
if (shift)
rs6000_finish_atomic_subword (operands[1], retval, shift);
+ else if (mode != GET_MODE (operands[1]))
+ convert_move (operands[1], retval, 1);
/* In all cases, CR0 contains EQ on success, and NE on failure. */
x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -18038,7 +18089,7 @@ rs6000_expand_atomic_exchange (rtx opera
mode = GET_MODE (mem);
mask = shift = NULL_RTX;
- if (mode == QImode || mode == HImode)
+ if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
{
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
@@ -18087,53 +18138,70 @@ rs6000_expand_atomic_op (enum rtx_code c
{
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
enum machine_mode mode = GET_MODE (mem);
+ enum machine_mode store_mode = mode;
rtx label, x, cond, mask, shift;
rtx before = orig_before, after = orig_after;
mask = shift = NULL_RTX;
+ /* On power8, we want to use SImode for the operation. On previous systems,
+ use the operation in a subword and shift/mask to get the proper byte or
+ halfword. */
if (mode == QImode || mode == HImode)
{
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
- /* Shift and mask VAL into position with the word. */
- val = convert_modes (SImode, mode, val, 1);
- val = expand_simple_binop (SImode, ASHIFT, val, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ if (TARGET_SYNC_HI_QI)
+ {
+ val = convert_modes (SImode, mode, val, 1);
- switch (code)
+ /* Prepare to adjust the return value. */
+ before = gen_reg_rtx (SImode);
+ if (after)
+ after = gen_reg_rtx (SImode);
+ mode = SImode;
+ }
+ else
{
- case IOR:
- case XOR:
- /* We've already zero-extended VAL. That is sufficient to
- make certain that it does not affect other bits. */
- mask = NULL;
- break;
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
- case AND:
- /* If we make certain that all of the other bits in VAL are
- set, that will be sufficient to not affect other bits. */
- x = gen_rtx_NOT (SImode, mask);
- x = gen_rtx_IOR (SImode, x, val);
- emit_insn (gen_rtx_SET (VOIDmode, val, x));
- mask = NULL;
- break;
+ /* Shift and mask VAL into position with the word. */
+ val = convert_modes (SImode, mode, val, 1);
+ val = expand_simple_binop (SImode, ASHIFT, val, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
- case NOT:
- case PLUS:
- case MINUS:
- /* These will all affect bits outside the field and need
- adjustment via MASK within the loop. */
- break;
+ switch (code)
+ {
+ case IOR:
+ case XOR:
+ /* We've already zero-extended VAL. That is sufficient to
+ make certain that it does not affect other bits. */
+ mask = NULL;
+ break;
- default:
- gcc_unreachable ();
- }
+ case AND:
+ /* If we make certain that all of the other bits in VAL are
+ set, that will be sufficient to not affect other bits. */
+ x = gen_rtx_NOT (SImode, mask);
+ x = gen_rtx_IOR (SImode, x, val);
+ emit_insn (gen_rtx_SET (VOIDmode, val, x));
+ mask = NULL;
+ break;
- /* Prepare to adjust the return value. */
- before = gen_reg_rtx (SImode);
- if (after)
- after = gen_reg_rtx (SImode);
- mode = SImode;
+ case NOT:
+ case PLUS:
+ case MINUS:
+ /* These will all affect bits outside the field and need
+ adjustment via MASK within the loop. */
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Prepare to adjust the return value. */
+ before = gen_reg_rtx (SImode);
+ if (after)
+ after = gen_reg_rtx (SImode);
+ store_mode = mode = SImode;
+ }
}
mem = rs6000_pre_atomic_barrier (mem, model);
@@ -18166,9 +18234,11 @@ rs6000_expand_atomic_op (enum rtx_code c
NULL_RTX, 1, OPTAB_LIB_WIDEN);
x = rs6000_mask_atomic_subword (before, x, mask);
}
+ else if (store_mode != mode)
+ x = convert_modes (store_mode, mode, x, 1);
cond = gen_reg_rtx (CCmode);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (store_mode, cond, mem, x);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
emit_unlikely_jump (x, label);
@@ -18177,11 +18247,22 @@ rs6000_expand_atomic_op (enum rtx_code c
if (shift)
{
+ /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+ then do the calcuations in a SImode register. */
if (orig_before)
rs6000_finish_atomic_subword (orig_before, before, shift);
if (orig_after)
rs6000_finish_atomic_subword (orig_after, after, shift);
}
+ else if (store_mode != mode)
+ {
+ /* QImode/HImode on machines with lbarx/lharx where we do the native
+ operation and then do the calcuations in a SImode register. */
+ if (orig_before)
+ convert_move (orig_before, before, 1);
+ if (orig_after)
+ convert_move (orig_after, after, 1);
+ }
else if (orig_after && after != orig_after)
emit_move_insn (orig_after, after);
}
===================================================================
@@ -18,14 +18,23 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+(define_mode_attr larx [(QI "lbarx")
+ (HI "lharx")
+ (SI "lwarx")
+ (DI "ldarx")
+ (TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+ (HI "sthcx.")
+ (SI "stwcx.")
+ (DI "stdcx.")
+ (TI "stqcx.")])
(define_code_iterator FETCHOP [plus minus ior xor and])
(define_code_attr fetchop_name
[(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
(define_code_attr fetchop_pred
- [(plus "add_operand") (minus "gpc_reg_operand")
+ [(plus "add_operand") (minus "int_reg_operand")
(ior "logical_operand") (xor "logical_operand") (and "and_operand")])
(define_expand "mem_thread_fence"
@@ -129,16 +138,7 @@ (define_expand "atomic_load<mode>"
case MEMMODEL_CONSUME:
case MEMMODEL_ACQUIRE:
case MEMMODEL_SEQ_CST:
- if (GET_MODE (operands[0]) == QImode)
- emit_insn (gen_loadsync_qi (operands[0]));
- else if (GET_MODE (operands[0]) == HImode)
- emit_insn (gen_loadsync_hi (operands[0]));
- else if (GET_MODE (operands[0]) == SImode)
- emit_insn (gen_loadsync_si (operands[0]));
- else if (GET_MODE (operands[0]) == DImode)
- emit_insn (gen_loadsync_di (operands[0]));
- else
- gcc_unreachable ();
+ emit_insn (gen_loadsync_<mode> (operands[0]));
break;
default:
gcc_unreachable ();
@@ -170,35 +170,109 @@ (define_expand "atomic_store<mode>"
DONE;
})
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
-;; opcode that is "phased-in". Not implemented as of Power7, so not yet used,
-;; but let's prepare the macros anyway.
-
-(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")])
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8. TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+ (HI "TARGET_SYNC_HI_QI")
+ SI
+ (DI "TARGET_POWERPC64")])
+
+;; Types that we should provide atomic instructions for.
+
+(define_mode_iterator AINT [QI
+ HI
+ SI
+ (DI "TARGET_POWERPC64")
+ (TI "TARGET_SYNC_TI")])
(define_insn "load_locked<mode>"
- [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
+ [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
(unspec_volatile:ATOMIC
[(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
""
"<larx> %0,%y1"
[(set_attr "type" "load_l")])
+(define_insn "load_locked<QHI:mode>_si"
+ [(set (match_operand:SI 0 "int_reg_operand" "=r")
+ (unspec_volatile:SI
+ [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ "TARGET_SYNC_HI_QI"
+ "<QHI:larx> %0,%y1"
+ [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs
+(define_expand "load_lockedti"
+ [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+ (use (match_operand:TI 1 "memory_operand" ""))]
+ "TARGET_SYNC_TI"
+{
+ /* Use a temporary register to force getting an even register for the
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
+ extra copy. */
+ rtx pti = gen_reg_rtx (PTImode);
+ emit_insn (gen_load_lockedpti (pti, operands[1]));
+ emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+ DONE;
+})
+
+(define_insn "load_lockedpti"
+ [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+ (unspec_volatile:PTI
+ [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ "TARGET_SYNC_TI
+ && !reg_mentioned_p (operands[0], operands[1])
+ && quad_int_reg_operand (operands[0], PTImode)"
+ "lqarx %0,%y1"
+ [(set_attr "type" "load_l")])
+
(define_insn "store_conditional<mode>"
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
(set (match_operand:ATOMIC 1 "memory_operand" "=Z")
- (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
+ (match_operand:ATOMIC 2 "int_reg_operand" "r"))]
""
"<stcx> %2,%y1"
[(set_attr "type" "store_c")])
+(define_expand "store_conditionalti"
+ [(use (match_operand:CC 0 "cc_reg_operand" ""))
+ (use (match_operand:TI 1 "memory_operand" ""))
+ (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+ "TARGET_SYNC_TI"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
+ rtx pti_op2 = gen_reg_rtx (PTImode);
+
+ /* Use a temporary register to force getting an even register for the
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
+ extra copy. */
+ emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
+ emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+ DONE;
+})
+
+(define_insn "store_conditionalpti"
+ [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+ (set (match_operand:PTI 1 "memory_operand" "=Z")
+ (match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+ "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+ "stqcx. %2,%y1"
+ [(set_attr "type" "store_c")])
+
(define_expand "atomic_compare_and_swap<mode>"
- [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out
- (match_operand:INT1 2 "memory_operand" "") ;; memory
- (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected
- (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired
+ [(match_operand:SI 0 "int_reg_operand" "") ;; bool out
+ (match_operand:AINT 1 "int_reg_operand" "") ;; val out
+ (match_operand:AINT 2 "memory_operand" "") ;; memory
+ (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected
+ (match_operand:AINT 4 "int_reg_operand" "") ;; desired
(match_operand:SI 5 "const_int_operand" "") ;; is_weak
(match_operand:SI 6 "const_int_operand" "") ;; model succ
(match_operand:SI 7 "const_int_operand" "")] ;; model fail
@@ -209,9 +283,9 @@ (define_expand "atomic_compare_and_swap<
})
(define_expand "atomic_exchange<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; input
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; input
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -220,9 +294,9 @@ (define_expand "atomic_exchange<mode>"
})
(define_expand "atomic_<fetchop_name><mode>"
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 0)
- (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 0)
+ (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 2 "const_int_operand" "")] ;; model
""
{
@@ -232,8 +306,8 @@ (define_expand "atomic_<fetchop_name><mo
})
(define_expand "atomic_nand<mode>"
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
+ (match_operand:AINT 1 "int_reg_operand" "") ;; operand
(match_operand:SI 2 "const_int_operand" "")] ;; model
""
{
@@ -243,10 +317,10 @@ (define_expand "atomic_nand<mode>"
})
(define_expand "atomic_fetch_<fetchop_name><mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 1)
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 1)
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -256,9 +330,9 @@ (define_expand "atomic_fetch_<fetchop_na
})
(define_expand "atomic_fetch_nand<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -268,10 +342,10 @@ (define_expand "atomic_fetch_nand<mode>"
})
(define_expand "atomic_<fetchop_name>_fetch<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 1)
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 1)
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -281,9 +355,9 @@ (define_expand "atomic_<fetchop_name>_fe
})
(define_expand "atomic_nand_fetch<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
===================================================================
@@ -239,6 +239,12 @@ (define_mode_iterator INT1 [QI HI SI (DI
; extend modes for DImode
(define_mode_iterator QHSI [QI HI SI])
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
; SImode or DImode, even if DImode doesn't fit in GPRs.
(define_mode_iterator SDI [SI DI])
===================================================================
@@ -0,0 +1,207 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mcpu=power7 -O2" } */
+/* { dg-final { scan-assembler-not "lbarx" } } */
+/* { dg-final { scan-assembler-not "lharx" } } */
+/* { dg-final { scan-assembler-times "lwarx" 18 } } */
+/* { dg-final { scan-assembler-times "ldarx" 6 } } */
+/* { dg-final { scan-assembler-not "lqarx" } } */
+/* { dg-final { scan-assembler-not "stbcx" } } */
+/* { dg-final { scan-assembler-not "sthcx" } } */
+/* { dg-final { scan-assembler-times "stwcx" 18 } } */
+/* { dg-final { scan-assembler-times "stdcx" 6 } } */
+/* { dg-final { scan-assembler-not "stqcx" } } */
+/* { dg-final { scan-assembler-times "bl __atomic" 6 } } */
+/* { dg-final { scan-assembler-times "isync" 12 } } */
+/* { dg-final { scan-assembler-times "lwsync" 8 } } */
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
+
+/* Test for the byte atomic operations on power8 using lbarx/stbcx. */
+char
+char_fetch_add_relaxed (char *ptr, int value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+char
+char_fetch_sub_consume (char *ptr, int value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+char
+char_fetch_and_acquire (char *ptr, int value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+char
+char_fetch_ior_release (char *ptr, int value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+char
+char_fetch_xor_acq_rel (char *ptr, int value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+char
+char_fetch_nand_seq_cst (char *ptr, int value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the half word atomic operations on power8 using lharx/sthcx. */
+short
+short_fetch_add_relaxed (short *ptr, int value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+short
+short_fetch_sub_consume (short *ptr, int value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+short
+short_fetch_and_acquire (short *ptr, int value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+short
+short_fetch_ior_release (short *ptr, int value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+short
+short_fetch_xor_acq_rel (short *ptr, int value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+short
+short_fetch_nand_seq_cst (short *ptr, int value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the word atomic operations on power8 using lwarx/stwcx. */
+int
+int_fetch_add_relaxed (int *ptr, int value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+int
+int_fetch_sub_consume (int *ptr, int value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+int
+int_fetch_and_acquire (int *ptr, int value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+int
+int_fetch_ior_release (int *ptr, int value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+int
+int_fetch_xor_acq_rel (int *ptr, int value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+int
+int_fetch_nand_seq_cst (int *ptr, int value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the double word atomic operations on power8 using ldarx/stdcx. */
+long
+long_fetch_add_relaxed (long *ptr, long value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+long
+long_fetch_sub_consume (long *ptr, long value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+long
+long_fetch_and_acquire (long *ptr, long value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+long
+long_fetch_ior_release (long *ptr, long value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+long
+long_fetch_xor_acq_rel (long *ptr, long value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+long
+long_fetch_nand_seq_cst (long *ptr, long value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */
+__int128_t
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+__int128_t
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+__int128_t
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+__int128_t
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+__int128_t
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+__int128_t
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
===================================================================
@@ -0,0 +1,237 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "lbarx" 7 } } */
+/* { dg-final { scan-assembler-times "lharx" 7 } } */
+/* { dg-final { scan-assembler-times "lwarx" 7 } } */
+/* { dg-final { scan-assembler-times "ldarx" 7 } } */
+/* { dg-final { scan-assembler-times "lqarx" 7 } } */
+/* { dg-final { scan-assembler-times "stbcx" 7 } } */
+/* { dg-final { scan-assembler-times "sthcx" 7 } } */
+/* { dg-final { scan-assembler-times "stwcx" 7 } } */
+/* { dg-final { scan-assembler-times "stdcx" 7 } } */
+/* { dg-final { scan-assembler-times "stqcx" 7 } } */
+/* { dg-final { scan-assembler-not "bl __atomic" } } */
+/* { dg-final { scan-assembler-times "isync" 20 } } */
+/* { dg-final { scan-assembler-times "lwsync" 10 } } */
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
+
+/* Test for the byte atomic operations on power8 using lbarx/stbcx. */
+char
+char_fetch_add_relaxed (char *ptr, int value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+char
+char_fetch_sub_consume (char *ptr, int value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+char
+char_fetch_and_acquire (char *ptr, int value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+char
+char_fetch_ior_release (char *ptr, int value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+char
+char_fetch_xor_acq_rel (char *ptr, int value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+char
+char_fetch_nand_seq_cst (char *ptr, int value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+char_val_compare_and_swap (char *p, int i, int j, char *q)
+{
+ *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the half word atomic operations on power8 using lharx/sthcx. */
+short
+short_fetch_add_relaxed (short *ptr, int value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+short
+short_fetch_sub_consume (short *ptr, int value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+short
+short_fetch_and_acquire (short *ptr, int value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+short
+short_fetch_ior_release (short *ptr, int value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+short
+short_fetch_xor_acq_rel (short *ptr, int value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+short
+short_fetch_nand_seq_cst (short *ptr, int value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+short_val_compare_and_swap (short *p, int i, int j, short *q)
+{
+ *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the word atomic operations on power8 using lwarx/stwcx. */
+int
+int_fetch_add_relaxed (int *ptr, int value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+int
+int_fetch_sub_consume (int *ptr, int value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+int
+int_fetch_and_acquire (int *ptr, int value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+int
+int_fetch_ior_release (int *ptr, int value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+int
+int_fetch_xor_acq_rel (int *ptr, int value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+int
+int_fetch_nand_seq_cst (int *ptr, int value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+int_val_compare_and_swap (int *p, int i, int j, int *q)
+{
+ *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the double word atomic operations on power8 using ldarx/stdcx. */
+long
+long_fetch_add_relaxed (long *ptr, long value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+long
+long_fetch_sub_consume (long *ptr, long value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+long
+long_fetch_and_acquire (long *ptr, long value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+long
+long_fetch_ior_release (long *ptr, long value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+long
+long_fetch_xor_acq_rel (long *ptr, long value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+long
+long_fetch_nand_seq_cst (long *ptr, long value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+long_val_compare_and_swap (long *p, long i, long j, long *q)
+{
+ *q = __sync_val_compare_and_swap (p, i, j);
+}
+
+/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */
+__int128_t
+quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED);
+}
+
+__int128_t
+quad_fetch_sub_consume (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME);
+}
+
+__int128_t
+quad_fetch_and_acquire (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+__int128_t
+quad_fetch_ior_release (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE);
+}
+
+__int128_t
+quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL);
+}
+
+__int128_t
+quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST);
+}
+
+void
+quad_val_compare_and_swap (__int128_t *p, __int128_t i, __int128_t j, __int128_t *q)
+{
+ *q = __sync_val_compare_and_swap (p, i, j);
+}