[rs6000] power8 patches, patch #7, quad/byte/half-word atomic instructions

Message ID	20130611235334.GA20069@ibm-tiger.the-meissners.org
State	New
Headers	show Return-Path: <gcc-patches-return-343429-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:cc:subject:message-id:references:mime-version :content-type:in-reply-to; q=dns; s=default; b=sKdUct/LiGZuH7LqP K62bG98JdEVDQT+d7je4U85mBmzwMaZUZYmjbV2Fj1kxsGRQ2mLn57nMRdNyqIs6 rysnW0tyE8d4Prfmr5SQeBtj7Z8WkCoytMb6MHxOLvR69X5KpSw2Yq+WxJHSheED LbcTg/a3qJU4jhhHFTX0+u9nvM= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Gateway: Authorized Use Only! Violators will be prosecuted for <gcc-patches@gcc.gnu.org> from <meissner@ibm-tiger.the-meissners.org>; Tue, 11 Jun 2013 19:53:39 -0400 Gateway: Authorized Use Only! Violators will be prosecuted; Tue, 11 Jun 2013 19:53:37 -0400 Date: Tue, 11 Jun 2013 19:53:35 -0400 From: Michael Meissner <meissner@linux.vnet.ibm.com> To: David Edelsohn <dje.gcc@gmail.com> Cc: Michael Meissner <meissner@linux.vnet.ibm.com>, GCC Patches <gcc-patches@gcc.gnu.org>, Pat Haugen <pthaugen@us.ibm.com>, Peter Bergner <bergner@vnet.ibm.com> Subject: Re: [PATCH, rs6000] power8 patches, patch #7, quad/byte/half-word atomic instructions Message-ID: <20130611235334.GA20069@ibm-tiger.the-meissners.org> Mail-Followup-To: Michael Meissner <meissner@linux.vnet.ibm.com>, David Edelsohn <dje.gcc@gmail.com>, GCC Patches <gcc-patches@gcc.gnu.org>, Pat Haugen <pthaugen@us.ibm.com>, Peter Bergner <bergner@vnet.ibm.com> References: <20130520204053.GA21090@ibm-tiger.the-meissners.org> <20130522164952.GA14241@ibm-tiger.the-meissners.org> <CAGWvnyk1qJvPPimXRDQs_mJGZo=_U7N9y+s_D6wf7_z_w2dOEw@mail.gmail.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="+QahgC5+KEYLbs62" Content-Disposition: inline In-Reply-To: <CAGWvnyk1qJvPPimXRDQs_mJGZo=_U7N9y+s_D6wf7_z_w2dOEw@mail.gmail.com> User-Agent: Mutt/1.5.20 (2009-12-10)

Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 199965) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -17748,7 +17748,8 @@ emit_unlikely_jump (rtx cond, rtx label) } /* A subroutine of the atomic operation splitters. Emit a load-locked - instruction in MODE. */ + instruction in MODE. For QI/HImode, possibly use a pattern than includes + the zero_extend operation. */ static void emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) @@ -17757,12 +17758,26 @@ emit_load_locked (enum machine_mode mode switch (mode) { + case QImode: + fn = gen_load_lockedqi; + break; + case HImode: + fn = gen_load_lockedhi; + break; case SImode: - fn = gen_load_lockedsi; + if (GET_MODE (mem) == QImode) + fn = gen_load_lockedqi_si; + else if (GET_MODE (mem) == HImode) + fn = gen_load_lockedhi_si; + else + fn = gen_load_lockedsi; break; case DImode: fn = gen_load_lockeddi; break; + case TImode: + fn = gen_load_lockedti; + break; default: gcc_unreachable (); } @@ -17779,12 +17794,21 @@ emit_store_conditional (enum machine_mod switch (mode) { + case QImode: + fn = gen_store_conditionalqi; + break; + case HImode: + fn = gen_store_conditionalhi; + break; case SImode: fn = gen_store_conditionalsi; break; case DImode: fn = gen_store_conditionaldi; break; + case TImode: + fn = gen_store_conditionalti; + break; default: gcc_unreachable (); } @@ -17931,7 +17955,7 @@ rs6000_expand_atomic_compare_and_swap (r { rtx boolval, retval, mem, oldval, newval, cond; rtx label1, label2, x, mask, shift; - enum machine_mode mode; + enum machine_mode mode, orig_mode; enum memmodel mod_s, mod_f; bool is_weak; @@ -17943,22 +17967,29 @@ rs6000_expand_atomic_compare_and_swap (r is_weak = (INTVAL (operands[5]) != 0); mod_s = (enum memmodel) INTVAL (operands[6]); mod_f = (enum memmodel) INTVAL (operands[7]); - mode = GET_MODE (mem); + orig_mode = mode = GET_MODE (mem); mask = shift = NULL_RTX; if (mode == QImode || mode == HImode) { - mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - - /* Shift and mask OLDVAL into position with the word. */ + /* Before power8, we didn't have access to lbarx/lharx, so generate a + lwarx and shift/mask operations. With power8, we need to do the + comparison in SImode, but the store is still done in QI/HImode. */ oldval = convert_modes (SImode, mode, oldval, 1); - oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); - /* Shift and mask NEWVAL into position within the word. */ - newval = convert_modes (SImode, mode, newval, 1); - newval = expand_simple_binop (SImode, ASHIFT, newval, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (!TARGET_SYNC_HI_QI) + { + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); + + /* Shift and mask OLDVAL into position with the word. */ + oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + + /* Shift and mask NEWVAL into position within the word. */ + newval = convert_modes (SImode, mode, newval, 1); + newval = expand_simple_binop (SImode, ASHIFT, newval, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); + } /* Prepare to adjust the return value. */ retval = gen_reg_rtx (SImode); @@ -17987,7 +18018,25 @@ rs6000_expand_atomic_compare_and_swap (r } cond = gen_reg_rtx (CCmode); - x = gen_rtx_COMPARE (CCmode, x, oldval); + /* If we have TImode, synthesize a comparison. */ + if (mode != TImode) + x = gen_rtx_COMPARE (CCmode, x, oldval); + else + { + rtx xor1_result = gen_reg_rtx (DImode); + rtx xor2_result = gen_reg_rtx (DImode); + rtx or_result = gen_reg_rtx (DImode); + rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); + rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); + rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); + rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); + + emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); + emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); + emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); + x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); + } + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); @@ -17997,7 +18046,7 @@ rs6000_expand_atomic_compare_and_swap (r if (mask) x = rs6000_mask_atomic_subword (retval, newval, mask); - emit_store_conditional (mode, cond, mem, x); + emit_store_conditional (orig_mode, cond, mem, x); if (!is_weak) { @@ -18015,6 +18064,8 @@ rs6000_expand_atomic_compare_and_swap (r if (shift) rs6000_finish_atomic_subword (operands[1], retval, shift); + else if (mode != GET_MODE (operands[1])) + convert_move (operands[1], retval, 1); /* In all cases, CR0 contains EQ on success, and NE on failure. */ x = gen_rtx_EQ (SImode, cond, const0_rtx); @@ -18038,7 +18089,7 @@ rs6000_expand_atomic_exchange (rtx opera mode = GET_MODE (mem); mask = shift = NULL_RTX; - if (mode == QImode || mode == HImode) + if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) { mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); @@ -18087,53 +18138,70 @@ rs6000_expand_atomic_op (enum rtx_code c { enum memmodel model = (enum memmodel) INTVAL (model_rtx); enum machine_mode mode = GET_MODE (mem); + enum machine_mode store_mode = mode; rtx label, x, cond, mask, shift; rtx before = orig_before, after = orig_after; mask = shift = NULL_RTX; + /* On power8, we want to use SImode for the operation. On previous systems, + use the operation in a subword and shift/mask to get the proper byte or + halfword. */ if (mode == QImode || mode == HImode) { - mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - - /* Shift and mask VAL into position with the word. */ - val = convert_modes (SImode, mode, val, 1); - val = expand_simple_binop (SImode, ASHIFT, val, shift, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (TARGET_SYNC_HI_QI) + { + val = convert_modes (SImode, mode, val, 1); - switch (code) + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + mode = SImode; + } + else { - case IOR: - case XOR: - /* We've already zero-extended VAL. That is sufficient to - make certain that it does not affect other bits. */ - mask = NULL; - break; + mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); - case AND: - /* If we make certain that all of the other bits in VAL are - set, that will be sufficient to not affect other bits. */ - x = gen_rtx_NOT (SImode, mask); - x = gen_rtx_IOR (SImode, x, val); - emit_insn (gen_rtx_SET (VOIDmode, val, x)); - mask = NULL; - break; + /* Shift and mask VAL into position with the word. */ + val = convert_modes (SImode, mode, val, 1); + val = expand_simple_binop (SImode, ASHIFT, val, shift, + NULL_RTX, 1, OPTAB_LIB_WIDEN); - case NOT: - case PLUS: - case MINUS: - /* These will all affect bits outside the field and need - adjustment via MASK within the loop. */ - break; + switch (code) + { + case IOR: + case XOR: + /* We've already zero-extended VAL. That is sufficient to + make certain that it does not affect other bits. */ + mask = NULL; + break; - default: - gcc_unreachable (); - } + case AND: + /* If we make certain that all of the other bits in VAL are + set, that will be sufficient to not affect other bits. */ + x = gen_rtx_NOT (SImode, mask); + x = gen_rtx_IOR (SImode, x, val); + emit_insn (gen_rtx_SET (VOIDmode, val, x)); + mask = NULL; + break; - /* Prepare to adjust the return value. */ - before = gen_reg_rtx (SImode); - if (after) - after = gen_reg_rtx (SImode); - mode = SImode; + case NOT: + case PLUS: + case MINUS: + /* These will all affect bits outside the field and need + adjustment via MASK within the loop. */ + break; + + default: + gcc_unreachable (); + } + + /* Prepare to adjust the return value. */ + before = gen_reg_rtx (SImode); + if (after) + after = gen_reg_rtx (SImode); + store_mode = mode = SImode; + } } mem = rs6000_pre_atomic_barrier (mem, model); @@ -18166,9 +18234,11 @@ rs6000_expand_atomic_op (enum rtx_code c NULL_RTX, 1, OPTAB_LIB_WIDEN); x = rs6000_mask_atomic_subword (before, x, mask); } + else if (store_mode != mode) + x = convert_modes (store_mode, mode, x, 1); cond = gen_reg_rtx (CCmode); - emit_store_conditional (mode, cond, mem, x); + emit_store_conditional (store_mode, cond, mem, x); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); emit_unlikely_jump (x, label); @@ -18177,11 +18247,22 @@ rs6000_expand_atomic_op (enum rtx_code c if (shift) { + /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and + then do the calcuations in a SImode register. */ if (orig_before) rs6000_finish_atomic_subword (orig_before, before, shift); if (orig_after) rs6000_finish_atomic_subword (orig_after, after, shift); } + else if (store_mode != mode) + { + /* QImode/HImode on machines with lbarx/lharx where we do the native + operation and then do the calcuations in a SImode register. */ + if (orig_before) + convert_move (orig_before, before, 1); + if (orig_after) + convert_move (orig_after, after, 1); + } else if (orig_after && after != orig_after) emit_move_insn (orig_after, after); } Index: gcc/config/rs6000/sync.md =================================================================== --- gcc/config/rs6000/sync.md (revision 199965) +++ gcc/config/rs6000/sync.md (working copy) @@ -18,14 +18,23 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. -(define_mode_attr larx [(SI "lwarx") (DI "ldarx")]) -(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")]) +(define_mode_attr larx [(QI "lbarx") + (HI "lharx") + (SI "lwarx") + (DI "ldarx") + (TI "lqarx")]) + +(define_mode_attr stcx [(QI "stbcx.") + (HI "sthcx.") + (SI "stwcx.") + (DI "stdcx.") + (TI "stqcx.")]) (define_code_iterator FETCHOP [plus minus ior xor and]) (define_code_attr fetchop_name [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")]) (define_code_attr fetchop_pred - [(plus "add_operand") (minus "gpc_reg_operand") + [(plus "add_operand") (minus "int_reg_operand") (ior "logical_operand") (xor "logical_operand") (and "and_operand")]) (define_expand "mem_thread_fence" @@ -129,16 +138,7 @@ (define_expand "atomic_load<mode>" case MEMMODEL_CONSUME: case MEMMODEL_ACQUIRE: case MEMMODEL_SEQ_CST: - if (GET_MODE (operands[0]) == QImode) - emit_insn (gen_loadsync_qi (operands[0])); - else if (GET_MODE (operands[0]) == HImode) - emit_insn (gen_loadsync_hi (operands[0])); - else if (GET_MODE (operands[0]) == SImode) - emit_insn (gen_loadsync_si (operands[0])); - else if (GET_MODE (operands[0]) == DImode) - emit_insn (gen_loadsync_di (operands[0])); - else - gcc_unreachable (); + emit_insn (gen_loadsync_<mode> (operands[0])); break; default: gcc_unreachable (); @@ -170,35 +170,109 @@ (define_expand "atomic_store<mode>" DONE; }) -;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve -;; opcode that is "phased-in". Not implemented as of Power7, so not yet used, -;; but let's prepare the macros anyway. - -(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")]) +;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons +;; other than the quad memory operations, which have special restrictions. +;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased +;; in and did not show up until power8. TImode atomic lqarx/stqcx. require +;; special handling due to even/odd register requirements. +(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI") + (HI "TARGET_SYNC_HI_QI") + SI + (DI "TARGET_POWERPC64")]) + +;; Types that we should provide atomic instructions for. + +(define_mode_iterator AINT [QI + HI + SI + (DI "TARGET_POWERPC64") + (TI "TARGET_SYNC_TI")]) (define_insn "load_locked<mode>" - [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r") + [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r") (unspec_volatile:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))] "" "<larx> %0,%y1" [(set_attr "type" "load_l")]) +(define_insn "load_locked<QHI:mode>_si" + [(set (match_operand:SI 0 "int_reg_operand" "=r") + (unspec_volatile:SI + [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_HI_QI" + "<QHI:larx> %0,%y1" + [(set_attr "type" "load_l")]) + +;; Use PTImode to get even/odd register pairs +(define_expand "load_lockedti" + [(use (match_operand:TI 0 "quad_int_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" ""))] + "TARGET_SYNC_TI" +{ + /* Use a temporary register to force getting an even register for the + lqarx/stqcrx. instructions. Normal optimizations will eliminate this + extra copy. */ + rtx pti = gen_reg_rtx (PTImode); + emit_insn (gen_load_lockedpti (pti, operands[1])); + emit_move_insn (operands[0], gen_lowpart (TImode, pti)); + DONE; +}) + +(define_insn "load_lockedpti" + [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r") + (unspec_volatile:PTI + [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))] + "TARGET_SYNC_TI + && !reg_mentioned_p (operands[0], operands[1]) + && quad_int_reg_operand (operands[0], PTImode)" + "lqarx %0,%y1" + [(set_attr "type" "load_l")]) + (define_insn "store_conditional<mode>" [(set (match_operand:CC 0 "cc_reg_operand" "=x") (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) (set (match_operand:ATOMIC 1 "memory_operand" "=Z") - (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))] + (match_operand:ATOMIC 2 "int_reg_operand" "r"))] "" "<stcx> %2,%y1" [(set_attr "type" "store_c")]) +(define_expand "store_conditionalti" + [(use (match_operand:CC 0 "cc_reg_operand" "")) + (use (match_operand:TI 1 "memory_operand" "")) + (use (match_operand:TI 2 "quad_int_reg_operand" ""))] + "TARGET_SYNC_TI" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0)); + rtx pti_op2 = gen_reg_rtx (PTImode); + + /* Use a temporary register to force getting an even register for the + lqarx/stqcrx. instructions. Normal optimizations will eliminate this + extra copy. */ + emit_move_insn (pti_op2, gen_lowpart (PTImode, op2)); + emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2)); + DONE; +}) + +(define_insn "store_conditionalpti" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] UNSPECV_SC)) + (set (match_operand:PTI 1 "memory_operand" "=Z") + (match_operand:PTI 2 "quad_int_reg_operand" "r"))] + "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)" + "stqcx. %2,%y1" + [(set_attr "type" "store_c")]) + (define_expand "atomic_compare_and_swap<mode>" - [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out - (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out - (match_operand:INT1 2 "memory_operand" "") ;; memory - (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected - (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired + [(match_operand:SI 0 "int_reg_operand" "") ;; bool out + (match_operand:AINT 1 "int_reg_operand" "") ;; val out + (match_operand:AINT 2 "memory_operand" "") ;; memory + (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected + (match_operand:AINT 4 "int_reg_operand" "") ;; desired (match_operand:SI 5 "const_int_operand" "") ;; is_weak (match_operand:SI 6 "const_int_operand" "") ;; model succ (match_operand:SI 7 "const_int_operand" "")] ;; model fail @@ -209,9 +283,9 @@ (define_expand "atomic_compare_and_swap< }) (define_expand "atomic_exchange<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; input + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; input (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -220,9 +294,9 @@ (define_expand "atomic_exchange<mode>" }) (define_expand "atomic_<fetchop_name><mode>" - [(match_operand:INT1 0 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 0) - (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 0) + (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand (match_operand:SI 2 "const_int_operand" "")] ;; model "" { @@ -232,8 +306,8 @@ (define_expand "atomic_<fetchop_name><mo }) (define_expand "atomic_nand<mode>" - [(match_operand:INT1 0 "memory_operand" "") ;; memory - (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "memory_operand" "") ;; memory + (match_operand:AINT 1 "int_reg_operand" "") ;; operand (match_operand:SI 2 "const_int_operand" "")] ;; model "" { @@ -243,10 +317,10 @@ (define_expand "atomic_nand<mode>" }) (define_expand "atomic_fetch_<fetchop_name><mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 1) - (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -256,9 +330,9 @@ (define_expand "atomic_fetch_<fetchop_na }) (define_expand "atomic_fetch_nand<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -268,10 +342,10 @@ (define_expand "atomic_fetch_nand<mode>" }) (define_expand "atomic_<fetchop_name>_fetch<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (FETCHOP:INT1 (match_dup 1) - (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (FETCHOP:AINT (match_dup 1) + (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { @@ -281,9 +355,9 @@ (define_expand "atomic_<fetchop_name>_fe }) (define_expand "atomic_nand_fetch<mode>" - [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output - (match_operand:INT1 1 "memory_operand" "") ;; memory - (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand + [(match_operand:AINT 0 "int_reg_operand" "") ;; output + (match_operand:AINT 1 "memory_operand" "") ;; memory + (match_operand:AINT 2 "int_reg_operand" "") ;; operand (match_operand:SI 3 "const_int_operand" "")] ;; model "" { Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 199965) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -239,6 +239,12 @@ (define_mode_iterator INT1 [QI HI SI (DI ; extend modes for DImode (define_mode_iterator QHSI [QI HI SI]) +; QImode or HImode for small atomic ops +(define_mode_iterator QHI [QI HI]) + +; HImode or SImode for sign extended fusion ops +(define_mode_iterator HSI [HI SI]) + ; SImode or DImode, even if DImode doesn't fit in GPRs. (define_mode_iterator SDI [SI DI]) Index: gcc/testsuite/gcc.target/powerpc/atomic-p7.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/atomic-p7.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/atomic-p7.c (revision 0) @@ -0,0 +1,207 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-mcpu=power7 -O2" } */ +/* { dg-final { scan-assembler-not "lbarx" } } */ +/* { dg-final { scan-assembler-not "lharx" } } */ +/* { dg-final { scan-assembler-times "lwarx" 18 } } */ +/* { dg-final { scan-assembler-times "ldarx" 6 } } */ +/* { dg-final { scan-assembler-not "lqarx" } } */ +/* { dg-final { scan-assembler-not "stbcx" } } */ +/* { dg-final { scan-assembler-not "sthcx" } } */ +/* { dg-final { scan-assembler-times "stwcx" 18 } } */ +/* { dg-final { scan-assembler-times "stdcx" 6 } } */ +/* { dg-final { scan-assembler-not "stqcx" } } */ +/* { dg-final { scan-assembler-times "bl __atomic" 6 } } */ +/* { dg-final { scan-assembler-times "isync" 12 } } */ +/* { dg-final { scan-assembler-times "lwsync" 8 } } */ +/* { dg-final { scan-assembler-not "mtvsrd" } } */ +/* { dg-final { scan-assembler-not "mtvsrwa" } } */ +/* { dg-final { scan-assembler-not "mtvsrwz" } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrwz" } } */ + +/* Test for the byte atomic operations on power8 using lbarx/stbcx. */ +char +char_fetch_add_relaxed (char *ptr, int value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +char +char_fetch_sub_consume (char *ptr, int value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +char +char_fetch_and_acquire (char *ptr, int value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +char +char_fetch_ior_release (char *ptr, int value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +char +char_fetch_xor_acq_rel (char *ptr, int value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +char +char_fetch_nand_seq_cst (char *ptr, int value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +/* Test for the half word atomic operations on power8 using lharx/sthcx. */ +short +short_fetch_add_relaxed (short *ptr, int value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +short +short_fetch_sub_consume (short *ptr, int value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +short +short_fetch_and_acquire (short *ptr, int value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +short +short_fetch_ior_release (short *ptr, int value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +short +short_fetch_xor_acq_rel (short *ptr, int value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +short +short_fetch_nand_seq_cst (short *ptr, int value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +/* Test for the word atomic operations on power8 using lwarx/stwcx. */ +int +int_fetch_add_relaxed (int *ptr, int value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +int +int_fetch_sub_consume (int *ptr, int value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +int +int_fetch_and_acquire (int *ptr, int value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +int +int_fetch_ior_release (int *ptr, int value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +int +int_fetch_xor_acq_rel (int *ptr, int value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +int +int_fetch_nand_seq_cst (int *ptr, int value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +/* Test for the double word atomic operations on power8 using ldarx/stdcx. */ +long +long_fetch_add_relaxed (long *ptr, long value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +long +long_fetch_sub_consume (long *ptr, long value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +long +long_fetch_and_acquire (long *ptr, long value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +long +long_fetch_ior_release (long *ptr, long value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +long +long_fetch_xor_acq_rel (long *ptr, long value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +long +long_fetch_nand_seq_cst (long *ptr, long value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */ +__int128_t +quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +__int128_t +quad_fetch_sub_consume (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +__int128_t +quad_fetch_and_acquire (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +__int128_t +quad_fetch_ior_release (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +__int128_t +quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +__int128_t +quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} Index: gcc/testsuite/gcc.target/powerpc/atomic-p8.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/atomic-p8.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/atomic-p8.c (revision 0) @@ -0,0 +1,237 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O2" } */ +/* { dg-final { scan-assembler-times "lbarx" 7 } } */ +/* { dg-final { scan-assembler-times "lharx" 7 } } */ +/* { dg-final { scan-assembler-times "lwarx" 7 } } */ +/* { dg-final { scan-assembler-times "ldarx" 7 } } */ +/* { dg-final { scan-assembler-times "lqarx" 7 } } */ +/* { dg-final { scan-assembler-times "stbcx" 7 } } */ +/* { dg-final { scan-assembler-times "sthcx" 7 } } */ +/* { dg-final { scan-assembler-times "stwcx" 7 } } */ +/* { dg-final { scan-assembler-times "stdcx" 7 } } */ +/* { dg-final { scan-assembler-times "stqcx" 7 } } */ +/* { dg-final { scan-assembler-not "bl __atomic" } } */ +/* { dg-final { scan-assembler-times "isync" 20 } } */ +/* { dg-final { scan-assembler-times "lwsync" 10 } } */ +/* { dg-final { scan-assembler-not "mtvsrd" } } */ +/* { dg-final { scan-assembler-not "mtvsrwa" } } */ +/* { dg-final { scan-assembler-not "mtvsrwz" } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrwz" } } */ + +/* Test for the byte atomic operations on power8 using lbarx/stbcx. */ +char +char_fetch_add_relaxed (char *ptr, int value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +char +char_fetch_sub_consume (char *ptr, int value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +char +char_fetch_and_acquire (char *ptr, int value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +char +char_fetch_ior_release (char *ptr, int value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +char +char_fetch_xor_acq_rel (char *ptr, int value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +char +char_fetch_nand_seq_cst (char *ptr, int value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +void +char_val_compare_and_swap (char *p, int i, int j, char *q) +{ + *q = __sync_val_compare_and_swap (p, i, j); +} + +/* Test for the half word atomic operations on power8 using lharx/sthcx. */ +short +short_fetch_add_relaxed (short *ptr, int value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +short +short_fetch_sub_consume (short *ptr, int value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +short +short_fetch_and_acquire (short *ptr, int value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +short +short_fetch_ior_release (short *ptr, int value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +short +short_fetch_xor_acq_rel (short *ptr, int value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +short +short_fetch_nand_seq_cst (short *ptr, int value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +void +short_val_compare_and_swap (short *p, int i, int j, short *q) +{ + *q = __sync_val_compare_and_swap (p, i, j); +} + +/* Test for the word atomic operations on power8 using lwarx/stwcx. */ +int +int_fetch_add_relaxed (int *ptr, int value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +int +int_fetch_sub_consume (int *ptr, int value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +int +int_fetch_and_acquire (int *ptr, int value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +int +int_fetch_ior_release (int *ptr, int value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +int +int_fetch_xor_acq_rel (int *ptr, int value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +int +int_fetch_nand_seq_cst (int *ptr, int value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +void +int_val_compare_and_swap (int *p, int i, int j, int *q) +{ + *q = __sync_val_compare_and_swap (p, i, j); +} + +/* Test for the double word atomic operations on power8 using ldarx/stdcx. */ +long +long_fetch_add_relaxed (long *ptr, long value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +long +long_fetch_sub_consume (long *ptr, long value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +long +long_fetch_and_acquire (long *ptr, long value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +long +long_fetch_ior_release (long *ptr, long value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +long +long_fetch_xor_acq_rel (long *ptr, long value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +long +long_fetch_nand_seq_cst (long *ptr, long value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +void +long_val_compare_and_swap (long *p, long i, long j, long *q) +{ + *q = __sync_val_compare_and_swap (p, i, j); +} + +/* Test for the quad word atomic operations on power8 using ldarx/stdcx. */ +__int128_t +quad_fetch_add_relaxed (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_add (ptr, value, __ATOMIC_RELAXED); +} + +__int128_t +quad_fetch_sub_consume (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_sub (ptr, value, __ATOMIC_CONSUME); +} + +__int128_t +quad_fetch_and_acquire (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_and (ptr, value, __ATOMIC_ACQUIRE); +} + +__int128_t +quad_fetch_ior_release (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_or (ptr, value, __ATOMIC_RELEASE); +} + +__int128_t +quad_fetch_xor_acq_rel (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_xor (ptr, value, __ATOMIC_ACQ_REL); +} + +__int128_t +quad_fetch_nand_seq_cst (__int128_t *ptr, __int128_t value) +{ + return __atomic_fetch_nand (ptr, value, __ATOMIC_SEQ_CST); +} + +void +quad_val_compare_and_swap (__int128_t *p, __int128_t i, __int128_t j, __int128_t *q) +{ + *q = __sync_val_compare_and_swap (p, i, j); +}

[rs6000] power8 patches, patch #7, quad/byte/half-word atomic instructions

Commit Message

Comments

Patch