===================================================================
@@ -270,7 +270,7 @@
{
HOST_WIDE_INT r;
- if (!TARGET_QUAD_MEMORY)
+ if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC)
return 0;
if (GET_CODE (op) == SUBREG)
@@ -633,6 +633,7 @@
(match_test "offsettable_nonstrict_memref_p (op)")))
;; Return 1 if the operand is suitable for load/store quad memory.
+;; This predicate only checks for non-atomic loads/stores.
(define_predicate "quad_memory_operand"
(match_code "mem")
{
===================================================================
@@ -337,6 +337,10 @@ rs6000_target_modify_macros (bool define
rs6000_define_or_undefine_macro (define_p, "__HTM__");
if ((flags & OPTION_MASK_P8_VECTOR) != 0)
rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+ if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__");
+ if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
if ((flags & OPTION_MASK_CRYPTO) != 0)
rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
===================================================================
@@ -53,7 +53,8 @@
| OPTION_MASK_CRYPTO \
| OPTION_MASK_DIRECT_MOVE \
| OPTION_MASK_HTM \
- | OPTION_MASK_QUAD_MEMORY)
+ | OPTION_MASK_QUAD_MEMORY \
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC)
#define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
===================================================================
@@ -3317,14 +3317,37 @@ rs6000_option_override_internal (bool gl
/* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
silently turn off quad memory mode. */
- if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+ if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
warning (0, N_("-mquad-memory requires 64-bit mode"));
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+ warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
+
+ rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC);
+ }
+
+ /* Non-atomic quad memory load/store are disabled for little endian, since
+ the words are reversed, but atomic operations can still be done by
+ swapping the words. */
+ if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory is not available in little endian mode"));
+
rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
}
+ /* Assume if the user asked for normal quad memory instructions, they want
+ the atomic versions as well, unless they explicity told us not to use quad
+ word atomic instructions. */
+ if (TARGET_QUAD_MEMORY
+ && !TARGET_QUAD_MEMORY_ATOMIC
+ && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
+ rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
+
/* Enable power8 fusion if we are tuning for power8, even if we aren't
generating power8 instructions. */
if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
@@ -5875,7 +5898,8 @@ direct_move_p (rtx op0, rtx op1)
return false;
}
-/* Return true if this is a load or store quad operation. */
+/* Return true if this is a load or store quad operation. This function does
+ not handle the atomic quad memory instructions. */
bool
quad_load_store_p (rtx op0, rtx op1)
@@ -30675,6 +30699,7 @@ static struct rs6000_opt_mask const rs60
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
{ "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
+ { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "string", OPTION_MASK_STRING, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
===================================================================
@@ -524,8 +524,11 @@ extern int rs6000_vector_align[];
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
in power7, so conditionalize them on p8 features. TImode syncs need quad
memory support. */
-#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
-#define TARGET_SYNC_TI TARGET_QUAD_MEMORY
+#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY \
+ || TARGET_QUAD_MEMORY_ATOMIC \
+ || TARGET_DIRECT_MOVE)
+
+#define TARGET_SYNC_TI TARGET_QUAD_MEMORY_ATOMIC
/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
to allocate the SDmode stack slot to get the value into the proper location
===================================================================
@@ -556,7 +556,11 @@ Use ISA 2.07 transactional memory (HTM)
mquad-memory
Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
-Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
+Generate the quad word memory instructions (lq/stq).
+
+mquad-memory-atomic
+Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags)
+Generate the quad word memory atomic instructions (lqarx/stqcx).
mcompat-align-parm
Target Report Var(rs6000_compat_align_parm) Init(1) Save
===================================================================
@@ -204,25 +204,46 @@
"<QHI:larx> %0,%y1"
[(set_attr "type" "load_l")])
-;; Use PTImode to get even/odd register pairs
+;; Use PTImode to get even/odd register pairs.
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
(define_expand "load_lockedti"
[(use (match_operand:TI 0 "quad_int_reg_operand" ""))
(use (match_operand:TI 1 "memory_operand" ""))]
"TARGET_SYNC_TI"
{
- /* Use a temporary register to force getting an even register for the
- lqarx/stqcrx. instructions. Normal optimizations will eliminate this
- extra copy. */
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
rtx pti = gen_reg_rtx (PTImode);
- emit_insn (gen_load_lockedpti (pti, operands[1]));
- emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+
+ if (!indexed_or_indirect_operand (op1, TImode))
+ {
+ rtx old_addr = XEXP (op1, 0);
+ rtx new_addr = force_reg (Pmode, old_addr);
+ operands[1] = op1 = change_address (op1, TImode, new_addr);
+ }
+
+ emit_insn (gen_load_lockedpti (pti, op1));
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (op0, gen_lowpart (TImode, pti));
+ else
+ {
+ emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti));
+ emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti));
+ }
DONE;
})
(define_insn "load_lockedpti"
[(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
(unspec_volatile:PTI
- [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
"TARGET_SYNC_TI
&& !reg_mentioned_p (operands[0], operands[1])
&& quad_int_reg_operand (operands[0], PTImode)"
@@ -238,6 +259,14 @@
"<stcx> %2,%y1"
[(set_attr "type" "store_c")])
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
+;; copy on big endian systems.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
(define_expand "store_conditionalti"
[(use (match_operand:CC 0 "cc_reg_operand" ""))
(use (match_operand:TI 1 "memory_operand" ""))
@@ -247,21 +276,36 @@
rtx op0 = operands[0];
rtx op1 = operands[1];
rtx op2 = operands[2];
- rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
- rtx pti_op2 = gen_reg_rtx (PTImode);
+ rtx addr = XEXP (op1, 0);
+ rtx pti_mem;
+ rtx pti_reg;
+
+ if (!indexed_or_indirect_operand (op1, TImode))
+ {
+ rtx new_addr = force_reg (Pmode, addr);
+ operands[1] = op1 = change_address (op1, TImode, new_addr);
+ addr = new_addr;
+ }
+
+ pti_mem = change_address (op1, PTImode, addr);
+ pti_reg = gen_reg_rtx (PTImode);
+
+ if (WORDS_BIG_ENDIAN)
+ emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
+ else
+ {
+ emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op2));
+ emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op2));
+ }
- /* Use a temporary register to force getting an even register for the
- lqarx/stqcrx. instructions. Normal optimizations will eliminate this
- extra copy. */
- emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
- emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+ emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));
DONE;
})
(define_insn "store_conditionalpti"
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
- (set (match_operand:PTI 1 "memory_operand" "=Z")
+ (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z")
(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
"TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
"stqcx. %2,%y1"
===================================================================
@@ -858,7 +858,9 @@ See RS/6000 and PowerPC Options.
-msave-toc-indirect -mno-save-toc-indirect @gol
-mpower8-fusion -mno-mpower8-fusion -mpower8-vector -mno-power8-vector @gol
-mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol
--mquad-memory -mno-quad-memory}
+-mquad-memory -mno-quad-memory @gol
+-mquad-memory-atomic -mno-quad-memory-atomic @gol
+-mcompat-align-parm -mno-compat-align-parm}
@emph{RX Options}
@gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol
@@ -17243,8 +17245,8 @@ following options:
-mpopcntb -mpopcntd -mpowerpc64 @gol
-mpowerpc-gpopt -mpowerpc-gfxopt -msingle-float -mdouble-float @gol
-msimple-fpu -mstring -mmulhw -mdlmzb -mmfpgpr -mvsx @gol
--mcrypto -mdirect-move -mpower8-fusion -mpower8-vector -mquad-memory @gol
--mcompat-align-parm -mno-compat-align-parm}
+-mcrypto -mdirect-move -mpower8-fusion -mpower8-vector @gol
+-mquad-memory -mquad-memory-atomic}
The particular options set for any particular CPU varies between
compiler versions, depending on what setting seems to produce optimal
@@ -17399,10 +17401,18 @@ the vector instructions.
@itemx -mno-quad-memory
@opindex mquad-memory
@opindex mno-quad-memory
-Generate code that uses (does not use) the quad word memory
+Generate code that uses (does not use) the non-atomic quad word memory
instructions. The @option{-mquad-memory} option requires use of
64-bit mode.
+@item -mquad-memory-atomic
+@itemx -mno-quad-memory-atomic
+@opindex mquad-memory-atomic
+@opindex mno-quad-memory-atomic
+Generate code that uses (does not use) the atomic quad word memory
+instructions. The @option{-mquad-memory-atomic} option requires use of
+64-bit mode.
+
@item -mfloat-gprs=@var{yes/single/double/no}
@itemx -mfloat-gprs
@opindex mfloat-gprs
===================================================================
@@ -0,0 +1,67 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Test whether we get the right bits for quad word atomic instructions. */
+#include <stdlib.h>
+
+static __int128_t quad_fetch_and (__int128_t *, __int128_t value) __attribute__((__noinline__));
+static __int128_t quad_fetch_or (__int128_t *, __int128_t value) __attribute__((__noinline__));
+static __int128_t quad_fetch_add (__int128_t *, __int128_t value) __attribute__((__noinline__));
+
+static __int128_t
+quad_fetch_and (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+static __int128_t
+quad_fetch_or (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_or (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+static __int128_t
+quad_fetch_add (__int128_t *ptr, __int128_t value)
+{
+ return __atomic_fetch_add (ptr, value, __ATOMIC_ACQUIRE);
+}
+
+int
+main (void)
+{
+ __int128_t result;
+ __int128_t value;
+ __int128_t and_input = ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
+ __int128_t and_value = ((((__int128_t) 0xfffffffffffffff0ULL) << 64) | ((__int128_t) 0xfffffffffffffff0ULL));
+ __int128_t and_exp = ((((__int128_t) 0x1234567890abcde0ULL) << 64) | ((__int128_t) 0xfedcba0987654320ULL));
+
+ __int128_t or_input = ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
+ __int128_t or_value = ((((__int128_t) 0x0000000000000010ULL) << 64) | ((__int128_t) 0x000000000000000eULL));
+ __int128_t or_exp = ((((__int128_t) 0x1234567890abcdffULL) << 64) | ((__int128_t) 0xfedcba098765432fULL));
+
+ __int128_t add_input = ((((__int128_t) 0x1234567890abcdefULL) << 64) | ((__int128_t) 0xfedcba0987654321ULL));
+ __int128_t add_value = ((((__int128_t) 0x0000000001000000ULL) << 64) | ((__int128_t) 0x0000001000000000ULL));
+ __int128_t add_exp = ((((__int128_t) 0x1234567891abcdefULL) << 64) | ((__int128_t) 0xfedcba1987654321ULL));
+
+
+ value = and_input;
+ result = quad_fetch_and (&value, and_value);
+ if (result != and_input || value != and_exp)
+ abort ();
+
+ value = or_input;
+ result = quad_fetch_or (&value, or_value);
+ if (result != or_input || value != or_exp)
+ abort ();
+
+ value = add_input;
+ result = quad_fetch_add (&value, add_value);
+ if (result != add_input || value != add_exp)
+ abort ();
+
+ return 0;
+}
+