From patchwork Wed Nov 23 23:43:15 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 127404 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 807A21007D6 for ; Thu, 24 Nov 2011 10:43:54 +1100 (EST) Received: (qmail 9598 invoked by alias); 23 Nov 2011 23:43:52 -0000 Received: (qmail 9580 invoked by uid 22791); 23 Nov 2011 23:43:46 -0000 X-SWARE-Spam-Status: No, hits=-7.1 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, SPF_HELO_PASS X-Spam-Check-By: sourceware.org Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 23 Nov 2011 23:43:24 +0000 Received: from int-mx09.intmail.prod.int.phx2.redhat.com (int-mx09.intmail.prod.int.phx2.redhat.com [10.5.11.22]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id pANNhGn2006304 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Wed, 23 Nov 2011 18:43:16 -0500 Received: from anchor.twiddle.net (vpn-228-199.phx2.redhat.com [10.3.228.199]) by int-mx09.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id pANNhFed007505; Wed, 23 Nov 2011 18:43:16 -0500 Message-ID: <4ECD8513.502@redhat.com> Date: Wed, 23 Nov 2011 15:43:15 -0800 From: Richard Henderson User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:8.0) Gecko/20111115 Thunderbird/8.0 MIME-Version: 1.0 To: GCC Patches , richard.earnshaw@arm.com, ramana.radhakrishnan@arm.com Subject: [ARM] Rewrite ldrex/strex support for atomic optabs X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This transformation is quite a bit more dramatic than the other ports because ARM was not splitting the code sequences post-reload. Indeed, the failure to split resulted in a distinctly odd coding style where fake output routines were used to compute the length of the code sequence. This all seemed highly sub-optimal, so I rewrote everything from scratch. This has passed initial sniff test from a cross-compiler, and is undergoing full testing on an armv7 host. Hopefully results will be ready tomorrow. In the meantime, please review. r~ commit 1eb69240aca0dc6e2f6703ce406d22ceafc26605 Author: Richard Henderson Date: Wed Nov 23 12:48:03 2011 -0800 arm: Convert to atomic optabs. At the same time, perform post-reload splitting. diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 23a29c6..8774c8c 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -155,12 +155,11 @@ extern const char *vfp_output_fstmd (rtx *); extern void arm_set_return_address (rtx, rtx); extern int arm_eliminable_register (rtx); extern const char *arm_output_shift(rtx *, int); -extern void arm_expand_sync (enum machine_mode, struct arm_sync_generator *, - rtx, rtx, rtx, rtx); -extern const char *arm_output_memory_barrier (rtx *); -extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); extern int arm_attr_length_push_multi(rtx, rtx); +extern void arm_expand_compare_and_swap (rtx op[]); +extern void arm_split_compare_and_swap (rtx op[]); +extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d3d7454..df296af 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -24268,520 +24268,6 @@ arm_have_conditional_execution (void) return !TARGET_THUMB1; } -/* Legitimize a memory reference for sync primitive implemented using - ldrex / strex. We currently force the form of the reference to be - indirect without offset. We do not yet support the indirect offset - addressing supported by some ARM targets for these - instructions. */ -static rtx -arm_legitimize_sync_memory (rtx memory) -{ - rtx addr = force_reg (Pmode, XEXP (memory, 0)); - rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr); - - set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER); - MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory); - return legitimate_memory; -} - -/* An instruction emitter. */ -typedef void (* emit_f) (int label, const char *, rtx *); - -/* An instruction emitter that emits via the conventional - output_asm_insn. */ -static void -arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands) -{ - output_asm_insn (pattern, operands); -} - -/* Count the number of emitted synchronization instructions. */ -static unsigned arm_insn_count; - -/* An emitter that counts emitted instructions but does not actually - emit instruction into the instruction stream. */ -static void -arm_count (int label, - const char *pattern ATTRIBUTE_UNUSED, - rtx *operands ATTRIBUTE_UNUSED) -{ - if (! label) - ++ arm_insn_count; -} - -/* Construct a pattern using conventional output formatting and feed - it to output_asm_insn. Provides a mechanism to construct the - output pattern on the fly. Note the hard limit on the pattern - buffer size. */ -static void ATTRIBUTE_PRINTF_4 -arm_output_asm_insn (emit_f emit, int label, rtx *operands, - const char *pattern, ...) -{ - va_list ap; - char buffer[256]; - - va_start (ap, pattern); - vsprintf (buffer, pattern, ap); - va_end (ap); - emit (label, buffer, operands); -} - -/* Emit the memory barrier instruction, if any, provided by this - target to a specified emitter. */ -static void -arm_process_output_memory_barrier (emit_f emit, rtx *operands) -{ - if (TARGET_HAVE_DMB) - { - /* Note we issue a system level barrier. We should consider - issuing a inner shareabilty zone barrier here instead, ie. - "DMB ISH". */ - emit (0, "dmb\tsy", operands); - return; - } - - if (TARGET_HAVE_DMB_MCR) - { - emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands); - return; - } - - gcc_unreachable (); -} - -/* Emit the memory barrier instruction, if any, provided by this - target. */ -const char * -arm_output_memory_barrier (rtx *operands) -{ - arm_process_output_memory_barrier (arm_emit, operands); - return ""; -} - -/* Helper to figure out the instruction suffix required on ldrex/strex - for operations on an object of the specified mode. */ -static const char * -arm_ldrex_suffix (enum machine_mode mode) -{ - switch (mode) - { - case QImode: return "b"; - case HImode: return "h"; - case SImode: return ""; - case DImode: return "d"; - default: - gcc_unreachable (); - } - return ""; -} - -/* Emit an ldrex{b,h,d, } instruction appropriate for the specified - mode. */ -static void -arm_output_ldrex (emit_f emit, - enum machine_mode mode, - rtx target, - rtx memory) -{ - rtx operands[3]; - - operands[0] = target; - if (mode != DImode) - { - const char *suffix = arm_ldrex_suffix (mode); - operands[1] = memory; - arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); - } - else - { - /* The restrictions on target registers in ARM mode are that the two - registers are consecutive and the first one is even; Thumb is - actually more flexible, but DI should give us this anyway. - Note that the 1st register always gets the lowest word in memory. */ - gcc_assert ((REGNO (target) & 1) == 0); - operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1); - operands[2] = memory; - arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2"); - } -} - -/* Emit a strex{b,h,d, } instruction appropriate for the specified - mode. */ -static void -arm_output_strex (emit_f emit, - enum machine_mode mode, - const char *cc, - rtx result, - rtx value, - rtx memory) -{ - rtx operands[4]; - - operands[0] = result; - operands[1] = value; - if (mode != DImode) - { - const char *suffix = arm_ldrex_suffix (mode); - operands[2] = memory; - arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", - suffix, cc); - } - else - { - /* The restrictions on target registers in ARM mode are that the two - registers are consecutive and the first one is even; Thumb is - actually more flexible, but DI should give us this anyway. - Note that the 1st register always gets the lowest word in memory. */ - gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); - operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1); - operands[3] = memory; - arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3", - cc); - } -} - -/* Helper to emit an it instruction in Thumb2 mode only; although the assembler - will ignore it in ARM mode, emitting it will mess up instruction counts we - sometimes keep 'flags' are the extra t's and e's if it's more than one - instruction that is conditional. */ -static void -arm_output_it (emit_f emit, const char *flags, const char *cond) -{ - rtx operands[1]; /* Don't actually use the operand. */ - if (TARGET_THUMB2) - arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond); -} - -/* Helper to emit a two operand instruction. */ -static void -arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s) -{ - rtx operands[2]; - - operands[0] = d; - operands[1] = s; - arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic); -} - -/* Helper to emit a three operand instruction. */ -static void -arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b) -{ - rtx operands[3]; - - operands[0] = d; - operands[1] = a; - operands[2] = b; - arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic); -} - -/* Emit a load store exclusive synchronization loop. - - do - old_value = [mem] - if old_value != required_value - break; - t1 = sync_op (old_value, new_value) - [mem] = t1, t2 = [0|1] - while ! t2 - - Note: - t1 == t2 is not permitted - t1 == old_value is permitted - - required_value: - - RTX register representing the required old_value for - the modify to continue, if NULL no comparsion is performed. */ -static void -arm_output_sync_loop (emit_f emit, - enum machine_mode mode, - rtx old_value, - rtx memory, - rtx required_value, - rtx new_value, - rtx t1, - rtx t2, - enum attr_sync_op sync_op, - int early_barrier_required) -{ - rtx operands[2]; - /* We'll use the lo for the normal rtx in the none-DI case - as well as the least-sig word in the DI case. */ - rtx old_value_lo, required_value_lo, new_value_lo, t1_lo; - rtx old_value_hi, required_value_hi, new_value_hi, t1_hi; - - bool is_di = mode == DImode; - - gcc_assert (t1 != t2); - - if (early_barrier_required) - arm_process_output_memory_barrier (emit, NULL); - - arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX); - - arm_output_ldrex (emit, mode, old_value, memory); - - if (is_di) - { - old_value_lo = gen_lowpart (SImode, old_value); - old_value_hi = gen_highpart (SImode, old_value); - if (required_value) - { - required_value_lo = gen_lowpart (SImode, required_value); - required_value_hi = gen_highpart (SImode, required_value); - } - else - { - /* Silence false potentially unused warning. */ - required_value_lo = NULL_RTX; - required_value_hi = NULL_RTX; - } - new_value_lo = gen_lowpart (SImode, new_value); - new_value_hi = gen_highpart (SImode, new_value); - t1_lo = gen_lowpart (SImode, t1); - t1_hi = gen_highpart (SImode, t1); - } - else - { - old_value_lo = old_value; - new_value_lo = new_value; - required_value_lo = required_value; - t1_lo = t1; - - /* Silence false potentially unused warning. */ - t1_hi = NULL_RTX; - new_value_hi = NULL_RTX; - required_value_hi = NULL_RTX; - old_value_hi = NULL_RTX; - } - - if (required_value) - { - operands[0] = old_value_lo; - operands[1] = required_value_lo; - - arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1"); - if (is_di) - { - arm_output_it (emit, "", "eq"); - arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi); - } - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX); - } - - switch (sync_op) - { - case SYNC_OP_ADD: - arm_output_op3 (emit, is_di ? "adds" : "add", - t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_SUB: - arm_output_op3 (emit, is_di ? "subs" : "sub", - t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_IOR: - arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_XOR: - arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_AND: - arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_NAND: - arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); - arm_output_op2 (emit, "mvn", t1_lo, t1_lo); - if (is_di) - arm_output_op2 (emit, "mvn", t1_hi, t1_hi); - break; - - case SYNC_OP_NONE: - t1 = new_value; - t1_lo = new_value_lo; - if (is_di) - t1_hi = new_value_hi; - break; - } - - /* Note that the result of strex is a 0/1 flag that's always 1 register. */ - if (t2) - { - arm_output_strex (emit, mode, "", t2, t1, memory); - operands[0] = t2; - arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", - LOCAL_LABEL_PREFIX); - } - else - { - /* Use old_value for the return value because for some operations - the old_value can easily be restored. This saves one register. */ - arm_output_strex (emit, mode, "", old_value_lo, t1, memory); - operands[0] = old_value_lo; - arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", - LOCAL_LABEL_PREFIX); - - /* Note that we only used the _lo half of old_value as a temporary - so in DI we don't have to restore the _hi part. */ - switch (sync_op) - { - case SYNC_OP_ADD: - arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo); - break; - - case SYNC_OP_SUB: - arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo); - break; - - case SYNC_OP_XOR: - arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo); - break; - - case SYNC_OP_NONE: - arm_output_op2 (emit, "mov", old_value_lo, required_value_lo); - break; - - default: - gcc_unreachable (); - } - } - - /* Note: label is before barrier so that in cmp failure case we still get - a barrier to stop subsequent loads floating upwards past the ldrex - PR target/48126. */ - arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); - arm_process_output_memory_barrier (emit, NULL); -} - -static rtx -arm_get_sync_operand (rtx *operands, int index, rtx default_value) -{ - if (index > 0) - default_value = operands[index - 1]; - - return default_value; -} - -#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \ - arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT); - -/* Extract the operands for a synchroniztion instruction from the - instructions attributes and emit the instruction. */ -static void -arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands) -{ - rtx result, memory, required_value, new_value, t1, t2; - int early_barrier; - enum machine_mode mode; - enum attr_sync_op sync_op; - - result = FETCH_SYNC_OPERAND(result, 0); - memory = FETCH_SYNC_OPERAND(memory, 0); - required_value = FETCH_SYNC_OPERAND(required_value, 0); - new_value = FETCH_SYNC_OPERAND(new_value, 0); - t1 = FETCH_SYNC_OPERAND(t1, 0); - t2 = FETCH_SYNC_OPERAND(t2, 0); - early_barrier = - get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES; - sync_op = get_attr_sync_op (insn); - mode = GET_MODE (memory); - - arm_output_sync_loop (emit, mode, result, memory, required_value, - new_value, t1, t2, sync_op, early_barrier); -} - -/* Emit a synchronization instruction loop. */ -const char * -arm_output_sync_insn (rtx insn, rtx *operands) -{ - arm_process_output_sync_insn (arm_emit, insn, operands); - return ""; -} - -/* Count the number of machine instruction that will be emitted for a - synchronization instruction. Note that the emitter used does not - emit instructions, it just counts instructions being carefull not - to count labels. */ -unsigned int -arm_sync_loop_insns (rtx insn, rtx *operands) -{ - arm_insn_count = 0; - arm_process_output_sync_insn (arm_count, insn, operands); - return arm_insn_count; -} - -/* Helper to call a target sync instruction generator, dealing with - the variation in operands required by the different generators. */ -static rtx -arm_call_generator (struct arm_sync_generator *generator, rtx old_value, - rtx memory, rtx required_value, rtx new_value) -{ - switch (generator->op) - { - case arm_sync_generator_omn: - gcc_assert (! required_value); - return generator->u.omn (old_value, memory, new_value); - - case arm_sync_generator_omrn: - gcc_assert (required_value); - return generator->u.omrn (old_value, memory, required_value, new_value); - } - - return NULL; -} - -/* Expand a synchronization loop. The synchronization loop is expanded - as an opaque block of instructions in order to ensure that we do - not subsequently get extraneous memory accesses inserted within the - critical region. The exclusive access property of ldrex/strex is - only guaranteed in there are no intervening memory accesses. */ -void -arm_expand_sync (enum machine_mode mode, - struct arm_sync_generator *generator, - rtx target, rtx memory, rtx required_value, rtx new_value) -{ - if (target == NULL) - target = gen_reg_rtx (mode); - - memory = arm_legitimize_sync_memory (memory); - if (mode != SImode && mode != DImode) - { - rtx load_temp = gen_reg_rtx (SImode); - - if (required_value) - required_value = convert_modes (SImode, mode, required_value, true); - - new_value = convert_modes (SImode, mode, new_value, true); - emit_insn (arm_call_generator (generator, load_temp, memory, - required_value, new_value)); - emit_move_insn (target, gen_lowpart (mode, load_temp)); - } - else - { - emit_insn (arm_call_generator (generator, target, memory, required_value, - new_value)); - } -} - static unsigned int arm_autovectorize_vector_sizes (void) { @@ -24979,4 +24465,287 @@ arm_count_output_move_double_insns (rtx *operands) return count; } +/* Emit a memory barrier around an atomic sequence according to MODEL. */ + +static void +arm_pre_atomic_barrier (enum memmodel model) +{ + switch (model) + { + case MEMMODEL_RELAXED: + case MEMMODEL_CONSUME: + case MEMMODEL_ACQUIRE: + break; + case MEMMODEL_RELEASE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + emit_insn (gen_memory_barrier ()); + break; + default: + gcc_unreachable (); + } +} + +static void +arm_post_atomic_barrier (enum memmodel model) +{ + switch (model) + { + case MEMMODEL_RELAXED: + case MEMMODEL_CONSUME: + case MEMMODEL_RELEASE: + break; + case MEMMODEL_ACQUIRE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + emit_insn (gen_memory_barrier ()); + break; + default: + gcc_unreachable (); + } +} + +/* Emit the load-exclusive and store-exclusive instructions. */ + +static void +arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem) +{ + rtx (*gen) (rtx, rtx); + + switch (mode) + { + case QImode: gen = gen_arm_load_exclusiveqi; break; + case HImode: gen = gen_arm_load_exclusivehi; break; + case SImode: gen = gen_arm_load_exclusivesi; break; + case DImode: gen = gen_arm_load_exclusivedi; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (rval, mem)); +} + +static void +arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem) +{ + rtx (*gen) (rtx, rtx, rtx); + + switch (mode) + { + case QImode: gen = gen_arm_store_exclusiveqi; break; + case HImode: gen = gen_arm_store_exclusivehi; break; + case SImode: gen = gen_arm_store_exclusivesi; break; + case DImode: gen = gen_arm_store_exclusivedi; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (bval, rval, mem)); +} + +/* Mark the previous jump instruction as unlikely. */ + +static void +emit_unlikely_jump (rtx insn) +{ + rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); + + insn = emit_jump_insn (insn); + add_reg_note (insn, REG_BR_PROB, very_unlikely); +} + +/* Expand a compare and swap pattern. */ + +void +arm_expand_compare_and_swap (rtx operands[]) +{ + rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f; + enum machine_mode mode; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); + + bval = operands[0]; + rval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + + switch (mode) + { + case QImode: + case HImode: + /* For narrow modes, we're going to perform the comparison in SImode, + so do the zero-extension now. */ + rval = gen_reg_rtx (SImode); + oldval = convert_modes (SImode, mode, oldval, true); + /* FALLTHRU */ + + case SImode: + /* Force the value into a register if needed. We waited until after + the zero-extension above to do this properly. */ + if (!arm_add_operand (oldval, mode)) + oldval = force_reg (mode, oldval); + break; + + case DImode: + if (!cmpdi_operand (oldval, mode)) + oldval = force_reg (mode, oldval); + break; + + default: + gcc_unreachable (); + } + + switch (mode) + { + case QImode: gen = gen_atomic_compare_and_swapqi_1; break; + case HImode: gen = gen_atomic_compare_and_swaphi_1; break; + case SImode: gen = gen_atomic_compare_and_swapsi_1; break; + case DImode: gen = gen_atomic_compare_and_swapdi_1; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f)); + + if (mode == QImode || mode == HImode) + emit_move_insn (operands[1], gen_lowpart (mode, rval)); +} + +/* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether + another memory store between the load-exclusive and store-exclusive can + reset the monitor from Exclusive to Open state. This means we must wait + until after reload to split the pattern, lest we get a register spill in + the middle of the atomic sequence. */ + +void +arm_split_compare_and_swap (rtx operands[]) +{ + rtx bval, rval, mem, oldval, newval; + enum machine_mode mode; + enum memmodel mod_s, mod_f; + bool is_weak; + rtx label1, label2, x, cond; + + bval = operands[0]; + rval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = (operands[5] != const0_rtx); + mod_s = (enum memmodel) INTVAL (operands[6]); + mod_f = (enum memmodel) INTVAL (operands[7]); + mode = GET_MODE (mem); + + emit_move_insn (bval, const0_rtx); + + arm_pre_atomic_barrier (mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_label_rtx (); + emit_label (label1); + } + label2 = gen_label_rtx (); + + arm_emit_load_exclusive (mode, rval, mem); + + x = gen_rtx_NE (VOIDmode, rval, oldval); + if (mode == DImode) + x = gen_cbranchdi4 (x, rval, oldval, label2); + else + x = gen_cbranchsi4 (x, rval, oldval, label2); + emit_unlikely_jump (x); + + arm_emit_store_exclusive (mode, bval, mem, newval); + + /* Thumb1 does not have LDREX, so we do not need to consider that + when it comes to computing the below. */ + gcc_assert (TARGET_32BIT); + + if (is_weak) + emit_insn (gen_xorsi3 (bval, bval, const1_rtx)); + else + { + emit_insn (gen_xorsi3_compare0 (bval, bval, const1_rtx)); + + cond = gen_rtx_REG (CC_NOOVmode, CC_REGNUM); + x = gen_rtx_EQ (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + } + + if (mod_f != MEMMODEL_RELAXED) + emit_label (label2); + + arm_post_atomic_barrier (mod_s); + + if (mod_f == MEMMODEL_RELAXED) + emit_label (label2); +} + +void +arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, + rtx value, rtx model_rtx, rtx cond) +{ + enum memmodel model = (enum memmodel) INTVAL (model_rtx); + enum machine_mode mode = GET_MODE (mem); + enum machine_mode wmode = (mode == DImode ? DImode : SImode); + rtx label, x; + + arm_pre_atomic_barrier (model); + + label = gen_label_rtx (); + emit_label (label); + + if (new_out) + new_out = gen_lowpart (wmode, new_out); + if (old_out) + old_out = gen_lowpart (wmode, old_out); + else + old_out = new_out; + value = simplify_gen_subreg (wmode, value, mode, 0); + + arm_emit_load_exclusive (mode, old_out, mem); + + switch (code) + { + case SET: + new_out = value; + break; + + case NOT: + x = gen_rtx_AND (wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + x = gen_rtx_NOT (wmode, new_out); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + + case MINUS: + if (CONST_INT_P (value)) + { + value = GEN_INT (-INTVAL (value)); + code = PLUS; + } + /* FALLTHRU */ + + default: + x = gen_rtx_fmt_ee (code, wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + } + + arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out)); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label)); + + arm_post_atomic_barrier (model); +} + #include "gt-arm.h" diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 85e2b99..31f4856 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -123,24 +123,6 @@ enum target_cpus /* The processor for which instructions should be scheduled. */ extern enum processor_type arm_tune; -enum arm_sync_generator_tag - { - arm_sync_generator_omn, - arm_sync_generator_omrn - }; - -/* Wrapper to pass around a polymorphic pointer to a sync instruction - generator and. */ -struct arm_sync_generator -{ - enum arm_sync_generator_tag op; - union - { - rtx (* omn) (rtx, rtx, rtx); - rtx (* omrn) (rtx, rtx, rtx, rtx); - } u; -}; - typedef enum arm_cond_code { ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC, diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index a78ba88..93b3c63 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -151,11 +151,11 @@ VUNSPEC_WCMP_GT ; Used by the iwMMXT WCMPGT instructions VUNSPEC_EH_RETURN ; Use to override the return address for exception ; handling. - VUNSPEC_SYNC_COMPARE_AND_SWAP ; Represent an atomic compare swap. - VUNSPEC_SYNC_LOCK ; Represent a sync_lock_test_and_set. - VUNSPEC_SYNC_OP ; Represent a sync_ - VUNSPEC_SYNC_NEW_OP ; Represent a sync_new_ - VUNSPEC_SYNC_OLD_OP ; Represent a sync_old_ + VUNSPEC_ATOMIC_CAS ; Represent an atomic compare swap. + VUNSPEC_ATOMIC_XCHG ; Represent an atomic exchange. + VUNSPEC_ATOMIC_OP ; Represent an atomic operation. + VUNSPEC_LL ; Represent a load-register-exclusive. + VUNSPEC_SC ; Represent a store-register-exclusive. ]) ;;--------------------------------------------------------------------------- @@ -185,21 +185,9 @@ (define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp" (const (symbol_ref "arm_fpu_attr"))) -(define_attr "sync_result" "none,0,1,2,3,4,5" (const_string "none")) -(define_attr "sync_memory" "none,0,1,2,3,4,5" (const_string "none")) -(define_attr "sync_required_value" "none,0,1,2,3,4,5" (const_string "none")) -(define_attr "sync_new_value" "none,0,1,2,3,4,5" (const_string "none")) -(define_attr "sync_t1" "none,0,1,2,3,4,5" (const_string "none")) -(define_attr "sync_t2" "none,0,1,2,3,4,5" (const_string "none")) -(define_attr "sync_release_barrier" "yes,no" (const_string "yes")) -(define_attr "sync_op" "none,add,sub,ior,xor,and,nand" - (const_string "none")) - ; LENGTH of an instruction (in bytes) (define_attr "length" "" - (cond [(not (eq_attr "sync_memory" "none")) - (symbol_ref "arm_sync_loop_insns (insn, operands) * 4") - ] (const_int 4))) + (const_int 4)) ; The architecture which supports the instruction (or alternative). ; This can be "a" for ARM, "t" for either of the Thumbs, "32" for @@ -3066,7 +3054,7 @@ [(set_attr "length" "2") (set_attr "conds" "set")]) -(define_insn "*xorsi3_compare0" +(define_insn "xorsi3_compare0" [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (xor:SI (match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_rhs_operand" "rI")) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index d8ce982..5215337 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -291,6 +291,11 @@ (and (match_code "const_double") (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)"))) +(define_memory_constraint "Ua" + "@internal + An address valid for loading/storing register exclusive" + (match_operand 0 "mem_noofs_operand")) + (define_memory_constraint "Ut" "@internal In ARM/Thumb-2 state an address valid for loading/storing opaque structure diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 92eb004..e486404 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -764,3 +764,7 @@ (define_special_predicate "add_operator" (match_code "plus")) + +(define_predicate "mem_noofs_operand" + (and (match_code "mem") + (match_code "reg" "0"))) diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md index 40ee93c..147625a 100644 --- a/gcc/config/arm/sync.md +++ b/gcc/config/arm/sync.md @@ -1,5 +1,5 @@ ;; Machine description for ARM processor synchronization primitives. -;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com) ;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org) ;; @@ -19,11 +19,20 @@ ;; along with GCC; see the file COPYING3. If not see ;; . */ -;; ARMV6 introduced ldrex and strex instruction. These instruction -;; access SI width data. In order to implement synchronization -;; primitives for the narrower QI and HI modes we insert appropriate -;; AND/OR sequences into the synchronization loop to mask out the -;; relevant component of an SI access. +(define_mode_attr sync_predtab + [(QI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER") + (HI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER") + (SI "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER") + (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN + && TARGET_HAVE_MEMORY_BARRIER")]) + +(define_code_iterator syncop [plus minus ior xor and]) + +(define_code_attr sync_optab + [(ior "ior") (xor "xor") (and "and") (plus "add") (minus "sub")]) + +(define_mode_attr sync_sfx + [(QI "b") (HI "h") (SI "") (DI "d")]) (define_expand "memory_barrier" [(set (match_dup 0) @@ -34,463 +43,308 @@ MEM_VOLATILE_P (operands[0]) = 1; }) - -(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX && - TARGET_HAVE_MEMORY_BARRIER") - (QI "TARGET_HAVE_LDREXBH && - TARGET_HAVE_MEMORY_BARRIER") - (HI "TARGET_HAVE_LDREXBH && - TARGET_HAVE_MEMORY_BARRIER") - (DI "TARGET_HAVE_LDREXD && - ARM_DOUBLEWORD_ALIGN && - TARGET_HAVE_MEMORY_BARRIER")]) - -(define_expand "sync_compare_and_swap" - [(set (match_operand:QHSD 0 "s_register_operand") - (unspec_volatile:QHSD [(match_operand:QHSD 1 "memory_operand") - (match_operand:QHSD 2 "s_register_operand") - (match_operand:QHSD 3 "s_register_operand")] - VUNSPEC_SYNC_COMPARE_AND_SWAP))] - "" - { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omrn; - generator.u.omrn = gen_arm_sync_compare_and_swap; - arm_expand_sync (mode, &generator, operands[0], operands[1], - operands[2], operands[3]); - DONE; - }) - -(define_expand "sync_lock_test_and_set" - [(match_operand:QHSD 0 "s_register_operand") - (match_operand:QHSD 1 "memory_operand") - (match_operand:QHSD 2 "s_register_operand")] - "" +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] + "TARGET_HAVE_MEMORY_BARRIER" { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omn; - generator.u.omn = gen_arm_sync_lock_test_and_set; - arm_expand_sync (mode, &generator, operands[0], operands[1], NULL, - operands[2]); - DONE; - }) - -(define_code_iterator syncop [plus minus ior xor and]) - -(define_code_attr sync_optab [(ior "ior") - (xor "xor") - (and "and") - (plus "add") - (minus "sub")]) + if (TARGET_HAVE_DMB) + { + /* Note we issue a system level barrier. We should consider issuing + a inner shareabilty zone barrier here instead, ie. "DMB ISH". */ + /* ??? Differentiate based on SEQ_CST vs less strict? */ + return "dmb\tsy"; + } -(define_code_attr sync_clobber [(ior "=&r") - (and "=&r") - (xor "X") - (plus "X") - (minus "X")]) + if (TARGET_HAVE_DMB_MCR) + return "mcr\tp15, 0, r0, c7, c10, 5"; -(define_code_attr sync_t2_reqd [(ior "4") - (and "4") - (xor "*") - (plus "*") - (minus "*")]) - -(define_expand "sync_" - [(match_operand:QHSD 0 "memory_operand") - (match_operand:QHSD 1 "s_register_operand") - (syncop:QHSD (match_dup 0) (match_dup 1))] - "" - { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omn; - generator.u.omn = gen_arm_sync_new_; - arm_expand_sync (mode, &generator, NULL, operands[0], NULL, - operands[1]); - DONE; - }) + gcc_unreachable (); + } + [(set_attr "length" "4") + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")]) -(define_expand "sync_nand" - [(match_operand:QHSD 0 "memory_operand") - (match_operand:QHSD 1 "s_register_operand") - (not:QHSD (and:QHSD (match_dup 0) (match_dup 1)))] +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "s_register_operand" "") ;; bool out + (match_operand:QHSD 1 "s_register_operand" "") ;; val out + (match_operand:QHSD 2 "mem_noofs_operand" "") ;; memory + (match_operand:QHSD 3 "general_operand" "") ;; expected + (match_operand:QHSD 4 "s_register_operand" "") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; mod_s + (match_operand:SI 7 "const_int_operand")] ;; mod_f "" - { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omn; - generator.u.omn = gen_arm_sync_new_nand; - arm_expand_sync (mode, &generator, NULL, operands[0], NULL, - operands[1]); - DONE; - }) +{ + arm_expand_compare_and_swap (operands); + DONE; +}) -(define_expand "sync_new_" - [(match_operand:QHSD 0 "s_register_operand") - (match_operand:QHSD 1 "memory_operand") - (match_operand:QHSD 2 "s_register_operand") - (syncop:QHSD (match_dup 1) (match_dup 2))] +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (match_operand:SI 0 "s_register_operand" "=&r") ;; bool out + (unspec_volatile:SI [(const_int 0)] VUNSPEC_ATOMIC_CAS)) + (set (match_operand:SI 1 "s_register_operand" "=&r") ;; val out + (zero_extend:SI + (match_operand:NARROW 2 "mem_noofs_operand" "+Ua"))) ;; memory + (set (match_dup 2) + (unspec_volatile:NARROW + [(match_operand:SI 3 "arm_add_operand" "rIL") ;; expected + (match_operand:NARROW 4 "s_register_operand" "r") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; mod_s + (match_operand:SI 7 "const_int_operand")] ;; mod_f + VUNSPEC_ATOMIC_CAS)) + (clobber (reg:CC CC_REGNUM))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omn; - generator.u.omn = gen_arm_sync_new_; - arm_expand_sync (mode, &generator, operands[0], operands[1], - NULL, operands[2]); + arm_split_compare_and_swap (operands); DONE; }) -(define_expand "sync_new_nand" - [(match_operand:QHSD 0 "s_register_operand") - (match_operand:QHSD 1 "memory_operand") - (match_operand:QHSD 2 "s_register_operand") - (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))] - "" - { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omn; - generator.u.omn = gen_arm_sync_new_nand; - arm_expand_sync (mode, &generator, operands[0], operands[1], - NULL, operands[2]); - DONE; - }); +(define_mode_attr cas_cmp_operand + [(SI "arm_add_operand") (DI "cmpdi_operand")]) +(define_mode_attr cas_cmp_str + [(SI "rIL") (DI "rDi")]) -(define_expand "sync_old_" - [(match_operand:QHSD 0 "s_register_operand") - (match_operand:QHSD 1 "memory_operand") - (match_operand:QHSD 2 "s_register_operand") - (syncop:QHSD (match_dup 1) (match_dup 2))] +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (match_operand:SI 0 "s_register_operand" "=&r") ;; bool out + (unspec_volatile:SI [(const_int 0)] VUNSPEC_ATOMIC_CAS)) + (set (match_operand:SIDI 1 "s_register_operand" "=&r") ;; val out + (match_operand:SIDI 2 "mem_noofs_operand" "+Ua")) ;; memory + (set (match_dup 2) + (unspec_volatile:SIDI + [(match_operand:SIDI 3 "" "") ;; expect + (match_operand:SIDI 4 "s_register_operand" "r") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; mod_s + (match_operand:SI 7 "const_int_operand")] ;; mod_f + VUNSPEC_ATOMIC_CAS)) + (clobber (reg:CC CC_REGNUM))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omn; - generator.u.omn = gen_arm_sync_old_; - arm_expand_sync (mode, &generator, operands[0], operands[1], - NULL, operands[2]); + arm_split_compare_and_swap (operands); DONE; }) -(define_expand "sync_old_nand" - [(match_operand:QHSD 0 "s_register_operand") - (match_operand:QHSD 1 "memory_operand") - (match_operand:QHSD 2 "s_register_operand") - (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))] +(define_insn_and_split "atomic_exchange" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") ;; output + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) ;; memory + (set (match_dup 1) + (unspec_volatile:QHSD + [(match_operand:QHSD 2 "s_register_operand" "r") ;; input + (match_operand:SI 3 "const_int_operand" "")] ;; model + VUNSPEC_ATOMIC_XCHG)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r"))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - struct arm_sync_generator generator; - generator.op = arm_sync_generator_omn; - generator.u.omn = gen_arm_sync_old_nand; - arm_expand_sync (mode, &generator, operands[0], operands[1], - NULL, operands[2]); + arm_split_atomic_op (SET, operands[0], NULL, operands[1], + operands[2], operands[3], operands[4]); DONE; }) -(define_insn "arm_sync_compare_and_swap" - [(set (match_operand:SIDI 0 "s_register_operand" "=&r") - (unspec_volatile:SIDI - [(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") - (match_operand:SIDI 2 "s_register_operand" "r") - (match_operand:SIDI 3 "s_register_operand" "r")] - VUNSPEC_SYNC_COMPARE_AND_SWAP)) - (set (match_dup 1) (unspec_volatile:SIDI [(match_dup 2)] - VUNSPEC_SYNC_COMPARE_AND_SWAP)) - (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] - VUNSPEC_SYNC_COMPARE_AND_SWAP)) - ] - "" - { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_required_value" "2") - (set_attr "sync_new_value" "3") - (set_attr "sync_t1" "0") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) +(define_mode_attr atomic_op_operand + [(QI "reg_or_int_operand") + (HI "reg_or_int_operand") + (SI "reg_or_int_operand") + (DI "s_register_operand")]) -(define_insn "arm_sync_compare_and_swap" - [(set (match_operand:SI 0 "s_register_operand" "=&r") - (zero_extend:SI - (unspec_volatile:NARROW - [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q") - (match_operand:SI 2 "s_register_operand" "r") - (match_operand:SI 3 "s_register_operand" "r")] - VUNSPEC_SYNC_COMPARE_AND_SWAP))) - (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)] - VUNSPEC_SYNC_COMPARE_AND_SWAP)) - (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] - VUNSPEC_SYNC_COMPARE_AND_SWAP)) - ] - "" - { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_required_value" "2") - (set_attr "sync_new_value" "3") - (set_attr "sync_t1" "0") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) +(define_mode_attr atomic_op_str + [(QI "rn") (HI "rn") (SI "rn") (DI "r")]) -(define_insn "arm_sync_lock_test_and_set" - [(set (match_operand:SIDI 0 "s_register_operand" "=&r") - (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")) - (set (match_dup 1) - (unspec_volatile:SIDI [(match_operand:SIDI 2 "s_register_operand" "r")] - VUNSPEC_SYNC_LOCK)) +(define_insn_and_split "atomic_" + [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua") + (unspec_volatile:QHSD + [(syncop:QHSD (match_dup 0) + (match_operand:QHSD 1 "" "")) + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r"))] + (clobber (match_scratch:QHSD 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_release_barrier" "no") - (set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "0") - (set_attr "sync_t2" "3") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) + arm_split_atomic_op (, NULL, operands[3], operands[0], + operands[1], operands[2], operands[4]); + DONE; + }) -(define_insn "arm_sync_lock_test_and_set" - [(set (match_operand:SI 0 "s_register_operand" "=&r") - (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))) - (set (match_dup 1) - (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")] - VUNSPEC_SYNC_LOCK)) +(define_insn_and_split "atomic_nand" + [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua") + (unspec_volatile:QHSD + [(not:QHSD + (and:QHSD (match_dup 0) + (match_operand:QHSD 1 "" ""))) + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r"))] + (clobber (match_scratch:QHSD 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_release_barrier" "no") - (set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "0") - (set_attr "sync_t2" "3") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) + arm_split_atomic_op (NOT, NULL, operands[3], operands[0], + operands[1], operands[2], operands[4]); + DONE; + }) -(define_insn "arm_sync_new_" - [(set (match_operand:SIDI 0 "s_register_operand" "=&r") - (unspec_volatile:SIDI [(syncop:SIDI - (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") - (match_operand:SIDI 2 "s_register_operand" "r")) - ] - VUNSPEC_SYNC_NEW_OP)) +(define_insn_and_split "atomic_fetch_" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) (set (match_dup 1) - (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_NEW_OP)) + (unspec_volatile:QHSD + [(syncop:QHSD (match_dup 1) + (match_operand:QHSD 2 "" "")) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r"))] + (clobber (match_scratch:QHSD 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r"))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "0") - (set_attr "sync_t2" "3") - (set_attr "sync_op" "") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) + arm_split_atomic_op (, operands[0], operands[4], operands[1], + operands[2], operands[3], operands[5]); + DONE; + }) -(define_insn "arm_sync_new_" - [(set (match_operand:SI 0 "s_register_operand" "=&r") - (unspec_volatile:SI [(syncop:SI - (zero_extend:SI - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) - (match_operand:SI 2 "s_register_operand" "r")) - ] - VUNSPEC_SYNC_NEW_OP)) +(define_insn_and_split "atomic_fetch_nand" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) (set (match_dup 1) - (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_NEW_OP)) + (unspec_volatile:QHSD + [(not:QHSD + (and:QHSD (match_dup 1) + (match_operand:QHSD 2 "" ""))) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r"))] + (clobber (match_scratch:QHSD 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r"))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "0") - (set_attr "sync_t2" "3") - (set_attr "sync_op" "") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) + arm_split_atomic_op (NOT, operands[0], operands[4], operands[1], + operands[2], operands[3], operands[5]); + DONE; + }) -(define_insn "arm_sync_new_nand" - [(set (match_operand:SIDI 0 "s_register_operand" "=&r") - (unspec_volatile:SIDI [(not:SIDI (and:SIDI - (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") - (match_operand:SIDI 2 "s_register_operand" "r"))) - ] - VUNSPEC_SYNC_NEW_OP)) +(define_insn_and_split "atomic__fetch" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (syncop:QHSD + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua") + (match_operand:QHSD 2 "" ""))) (set (match_dup 1) - (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_NEW_OP)) + (unspec_volatile:QHSD + [(match_dup 1) (match_dup 2) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r"))] + (clobber (match_scratch:SI 4 "=&r"))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "0") - (set_attr "sync_t2" "3") - (set_attr "sync_op" "nand") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) + arm_split_atomic_op (, NULL, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + }) -(define_insn "arm_sync_new_nand" - [(set (match_operand:SI 0 "s_register_operand" "=&r") - (unspec_volatile:SI - [(not:SI - (and:SI - (zero_extend:SI - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) - (match_operand:SI 2 "s_register_operand" "r"))) - ] VUNSPEC_SYNC_NEW_OP)) +(define_insn_and_split "atomic_nand_fetch" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (not:QHSD + (and:QHSD + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua") + (match_operand:QHSD 2 "" "")))) (set (match_dup 1) - (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_NEW_OP)) + (unspec_volatile:QHSD + [(match_dup 1) (match_dup 2) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r"))] + (clobber (match_scratch:SI 4 "=&r"))] "" + "#" + "&& reload_completed" + [(const_int 0)] { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "0") - (set_attr "sync_t2" "3") - (set_attr "sync_op" "nand") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) + arm_split_atomic_op (NOT, NULL, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + }) -(define_insn "arm_sync_old_" - [(set (match_operand:SIDI 0 "s_register_operand" "=&r") - (unspec_volatile:SIDI [(syncop:SIDI - (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") - (match_operand:SIDI 2 "s_register_operand" "r")) - ] - VUNSPEC_SYNC_OLD_OP)) - (set (match_dup 1) - (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_OLD_OP)) - (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SIDI 3 "=&r")) - (clobber (match_scratch:SI 4 ""))] - "" - { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "3") - (set_attr "sync_t2" "") - (set_attr "sync_op" "") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) +(define_insn "arm_load_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (unspec_volatile:NARROW + [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LL)))] + "TARGET_HAVE_LDREXBH" + "ldrex\t%0, %C1") -(define_insn "arm_sync_old_" - [(set (match_operand:SI 0 "s_register_operand" "=&r") - (unspec_volatile:SI [(syncop:SI - (zero_extend:SI - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) - (match_operand:SI 2 "s_register_operand" "r")) - ] - VUNSPEC_SYNC_OLD_OP)) - (set (match_dup 1) - (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_OLD_OP)) - (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r")) - (clobber (match_scratch:SI 4 ""))] - "" - { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "3") - (set_attr "sync_t2" "") - (set_attr "sync_op" "") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) +(define_insn "arm_load_exclusivesi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec_volatile:SI + [(match_operand:SI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LL))] + "TARGET_HAVE_LDREX" + "ldrex\t%0, %C1") -(define_insn "arm_sync_old_nand" - [(set (match_operand:SIDI 0 "s_register_operand" "=&r") - (unspec_volatile:SIDI [(not:SIDI (and:SIDI - (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q") - (match_operand:SIDI 2 "s_register_operand" "r"))) - ] - VUNSPEC_SYNC_OLD_OP)) - (set (match_dup 1) - (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_OLD_OP)) - (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SIDI 3 "=&r")) - (clobber (match_scratch:SI 4 "=&r"))] - "" +(define_insn "arm_load_exclusivedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (unspec_volatile:DI + [(match_operand:DI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LL))] + "TARGET_HAVE_LDREXD" { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "3") - (set_attr "sync_t2" "4") - (set_attr "sync_op" "nand") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) + rtx target = operands[0]; + /* The restrictions on target registers in ARM mode are that the two + registers are consecutive and the first one is even; Thumb is + actually more flexible, but DI should give us this anyway. + Note that the 1st register always gets the lowest word in memory. */ + gcc_assert ((REGNO (target) & 1) == 0); + operands[2] = gen_rtx_REG (SImode, REGNO (target) + 1); + return "ldrexd\t%0, %2, %C1"; + }) -(define_insn "arm_sync_old_nand" +(define_insn "arm_store_exclusive" [(set (match_operand:SI 0 "s_register_operand" "=&r") - (unspec_volatile:SI [(not:SI (and:SI - (zero_extend:SI - (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) - (match_operand:SI 2 "s_register_operand" "r"))) - ] - VUNSPEC_SYNC_OLD_OP)) - (set (match_dup 1) - (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] - VUNSPEC_SYNC_OLD_OP)) - (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 3 "=&r")) - (clobber (match_scratch:SI 4 "=&r"))] + (unspec_volatile:SI [(const_int 0)] VUNSPEC_SC)) + (set (match_operand:QHSD 1 "mem_noofs_operand" "=Ua") + (unspec_volatile:QHSD + [(match_operand:QHSD 2 "s_register_operand" "r")] + VUNSPEC_SC))] "" { - return arm_output_sync_insn (insn, operands); - } - [(set_attr "sync_result" "0") - (set_attr "sync_memory" "1") - (set_attr "sync_new_value" "2") - (set_attr "sync_t1" "3") - (set_attr "sync_t2" "4") - (set_attr "sync_op" "nand") - (set_attr "conds" "clob") - (set_attr "predicable" "no")]) - -(define_insn "*memory_barrier" - [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] - "TARGET_HAVE_MEMORY_BARRIER" - { - return arm_output_memory_barrier (operands); - } - [(set_attr "length" "4") - (set_attr "conds" "unconditional") - (set_attr "predicable" "no")]) - + if (mode == DImode) + { + rtx value = operands[2]; + /* The restrictions on target registers in ARM mode are that the two + registers are consecutive and the first one is even; Thumb is + actually more flexible, but DI should give us this anyway. + Note that the 1st register always gets the lowest word in memory. */ + gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); + operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); + return "strexd%s\t%0, %2, %3, %C1"; + } + return "strex\t%0, %2, %C1"; + })