Message ID | 20231114215404.163508-2-xry111@xry111.site |
---|---|
State | New |
Headers | show |
Series | [v2] LoongArch: Remove redundant barrier instructions before LL-SC loops | expand |
在 2023/11/15 上午5:52, Xi Ruoyao 写道: > This is isomorphic to the LLVM changes [1-2]. > > On LoongArch, the LL and SC instructions has memory barrier semantics: > > - LL: <memory-barrier> + <load-exclusive> > - SC: <store-conditional> + <memory-barrier> > > But the compare and swap operation is allowed to fail, and if it fails > the SC instruction is not executed, thus the guarantee of acquiring > semantics cannot be ensured. Therefore, an acquire barrier needs to be > generated when failure_memorder includes an acquire operation. > > On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an > acquire barrier; on CPUs implementing LoongArch v1.00, it is a full > barrier. So it's always enough for acquire semantics. OTOH if an > acquire semantic is not needed, we still needs the "dbar 0x700" as the > load-load barrier like all LL-SC loops. > > [1]:https://github.com/llvm/llvm-project/pull/67391 > [2]:https://github.com/llvm/llvm-project/pull/69339 I've done the test. There's no problem. Thanks. > gcc/ChangeLog: > > * config/loongarch/loongarch.cc > (loongarch_memmodel_needs_release_fence): Remove. > (loongarch_cas_failure_memorder_needs_acquire): New static > function. > (loongarch_print_operand): Redefine 'G' for the barrier on CAS > failure. > * config/loongarch/sync.md (atomic_cas_value_strong<mode>): > Remove the redundant barrier before the LL instruction, and > emit an acquire barrier on failure if needed by > failure_memorder. > (atomic_cas_value_cmp_and_7_<mode>): Likewise. > (atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier > before the LL instruction. > (atomic_cas_value_sub_7_<mode>): Likewise. > (atomic_cas_value_and_7_<mode>): Likewise. > (atomic_cas_value_xor_7_<mode>): Likewise. > (atomic_cas_value_or_7_<mode>): Likewise. > (atomic_cas_value_nand_7_<mode>): Likewise. > (atomic_cas_value_exchange_7_<mode>): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/loongarch/cas-acquire.c: New test. > --- > > v1: https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635304.html > > Changes from v1: > > - Remove yet not reachable "case MEMMODEL_CONSUME". > - Fix the test case so it almost always fail with (buggy) GCC 13 and > LA664. > > Bootstrapped and regtested on loongarch64-linux-gnu running on LA664. > Ok for trunk? > > gcc/config/loongarch/loongarch.cc | 30 ++++--- > gcc/config/loongarch/sync.md | 49 +++++------ > .../gcc.target/loongarch/cas-acquire.c | 82 +++++++++++++++++++ > 3 files changed, 119 insertions(+), 42 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c > > diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc > index 2998bf740d4..738911661d7 100644 > --- a/gcc/config/loongarch/loongarch.cc > +++ b/gcc/config/loongarch/loongarch.cc > @@ -5833,27 +5833,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) > } > } > > -/* Return true if a FENCE should be emitted to before a memory access to > - implement the release portion of memory model MODEL. */ > +/* Return true if a FENCE should be emitted after a failed CAS to > + implement the acquire semantic of failure_memorder. */ > > static bool > -loongarch_memmodel_needs_release_fence (enum memmodel model) > +loongarch_cas_failure_memorder_needs_acquire (enum memmodel model) > { > - switch (model) > + switch (memmodel_base (model)) > { > + case MEMMODEL_ACQUIRE: > case MEMMODEL_ACQ_REL: > case MEMMODEL_SEQ_CST: > - case MEMMODEL_SYNC_SEQ_CST: > - case MEMMODEL_RELEASE: > - case MEMMODEL_SYNC_RELEASE: > return true; > > - case MEMMODEL_ACQUIRE: > - case MEMMODEL_CONSUME: > - case MEMMODEL_SYNC_ACQUIRE: > case MEMMODEL_RELAXED: > + case MEMMODEL_RELEASE: > return false; > > + /* MEMMODEL_CONSUME is deliberately not handled because it's always > + replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by > + MEMMODEL_CONSUME, read the change (re)introducing it carefully and > + decide what to do. See PR 59448 and get_memmodel in builtins.cc. */ > default: > gcc_unreachable (); > } > @@ -5966,7 +5966,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, > 'd' Print CONST_INT OP in decimal. > 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal. > 'F' Print the FPU branch condition for comparison OP. > - 'G' Print a DBAR insn if the memory model requires a release. > + 'G' Print a DBAR insn for CAS failure (with an acquire semantic if > + needed, otherwise a simple load-load barrier). > 'H' Print address 52-61bit relocation associated with OP. > 'h' Print the high-part relocation associated with OP. > 'i' Print i if the operand is not a register. > @@ -6057,8 +6058,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter) > break; > > case 'G': > - if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) > - fputs ("dbar\t0", file); > + if (loongarch_cas_failure_memorder_needs_acquire ( > + memmodel_from_int (INTVAL (op)))) > + fputs ("dbar\t0b10100", file); > + else > + fputs ("dbar\t0x700", file); > break; > > case 'h': > diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md > index 1ad0c63e0d9..c112091a60f 100644 > --- a/gcc/config/loongarch/sync.md > +++ b/gcc/config/loongarch/sync.md > @@ -162,19 +162,18 @@ (define_insn "atomic_cas_value_strong<mode>" > (clobber (match_scratch:GPR 6 "=&r"))] > "" > { > - return "%G5\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "bne\\t%0,%z2,2f\\n\\t" > "or%i3\\t%6,$zero,%3\\n\\t" > "sc.<amo>\\t%6,%1\\n\\t" > - "beq\\t$zero,%6,1b\\n\\t" > + "beqz\\t%6,1b\\n\\t" > "b\\t3f\\n\\t" > "2:\\n\\t" > - "dbar\\t0x700\\n\\t" > + "%G5\\n\\t" > "3:\\n\\t"; > } > - [(set (attr "length") (const_int 32))]) > + [(set (attr "length") (const_int 28))]) > > (define_expand "atomic_compare_and_swap<mode>" > [(match_operand:SI 0 "register_operand" "") ;; bool output > @@ -267,8 +266,7 @@ (define_insn "atomic_cas_value_cmp_and_7_<mode>" > (clobber (match_scratch:GPR 7 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%2\\n\\t" > "bne\\t%7,%z4,2f\\n\\t" > @@ -278,10 +276,10 @@ (define_insn "atomic_cas_value_cmp_and_7_<mode>" > "beq\\t$zero,%7,1b\\n\\t" > "b\\t3f\\n\\t" > "2:\\n\\t" > - "dbar\\t0x700\\n\\t" > + "%G6\\n\\t" > "3:\\n\\t"; > } > - [(set (attr "length") (const_int 40))]) > + [(set (attr "length") (const_int 36))]) > > (define_expand "atomic_compare_and_swap<mode>" > [(match_operand:SI 0 "register_operand" "") ;; bool output > @@ -336,8 +334,7 @@ (define_insn "atomic_cas_value_add_7_<mode>" > (clobber (match_scratch:GPR 8 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%3\\n\\t" > "add.w\\t%8,%0,%z5\\n\\t" > @@ -347,7 +344,7 @@ (define_insn "atomic_cas_value_add_7_<mode>" > "beq\\t$zero,%7,1b"; > } > > - [(set (attr "length") (const_int 32))]) > + [(set (attr "length") (const_int 28))]) > > (define_insn "atomic_cas_value_sub_7_<mode>" > [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res > @@ -363,8 +360,7 @@ (define_insn "atomic_cas_value_sub_7_<mode>" > (clobber (match_scratch:GPR 8 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%3\\n\\t" > "sub.w\\t%8,%0,%z5\\n\\t" > @@ -373,7 +369,7 @@ (define_insn "atomic_cas_value_sub_7_<mode>" > "sc.<amo>\\t%7,%1\\n\\t" > "beq\\t$zero,%7,1b"; > } > - [(set (attr "length") (const_int 32))]) > + [(set (attr "length") (const_int 28))]) > > (define_insn "atomic_cas_value_and_7_<mode>" > [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res > @@ -389,8 +385,7 @@ (define_insn "atomic_cas_value_and_7_<mode>" > (clobber (match_scratch:GPR 8 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%3\\n\\t" > "and\\t%8,%0,%z5\\n\\t" > @@ -399,7 +394,7 @@ (define_insn "atomic_cas_value_and_7_<mode>" > "sc.<amo>\\t%7,%1\\n\\t" > "beq\\t$zero,%7,1b"; > } > - [(set (attr "length") (const_int 32))]) > + [(set (attr "length") (const_int 28))]) > > (define_insn "atomic_cas_value_xor_7_<mode>" > [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res > @@ -415,8 +410,7 @@ (define_insn "atomic_cas_value_xor_7_<mode>" > (clobber (match_scratch:GPR 8 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%3\\n\\t" > "xor\\t%8,%0,%z5\\n\\t" > @@ -426,7 +420,7 @@ (define_insn "atomic_cas_value_xor_7_<mode>" > "beq\\t$zero,%7,1b"; > } > > - [(set (attr "length") (const_int 32))]) > + [(set (attr "length") (const_int 28))]) > > (define_insn "atomic_cas_value_or_7_<mode>" > [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res > @@ -442,8 +436,7 @@ (define_insn "atomic_cas_value_or_7_<mode>" > (clobber (match_scratch:GPR 8 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%3\\n\\t" > "or\\t%8,%0,%z5\\n\\t" > @@ -453,7 +446,7 @@ (define_insn "atomic_cas_value_or_7_<mode>" > "beq\\t$zero,%7,1b"; > } > > - [(set (attr "length") (const_int 32))]) > + [(set (attr "length") (const_int 28))]) > > (define_insn "atomic_cas_value_nand_7_<mode>" > [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res > @@ -469,8 +462,7 @@ (define_insn "atomic_cas_value_nand_7_<mode>" > (clobber (match_scratch:GPR 8 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%3\\n\\t" > "and\\t%8,%0,%z5\\n\\t" > @@ -479,7 +471,7 @@ (define_insn "atomic_cas_value_nand_7_<mode>" > "sc.<amo>\\t%7,%1\\n\\t" > "beq\\t$zero,%7,1b"; > } > - [(set (attr "length") (const_int 32))]) > + [(set (attr "length") (const_int 28))]) > > (define_insn "atomic_cas_value_exchange_7_<mode>" > [(set (match_operand:GPR 0 "register_operand" "=&r") > @@ -494,8 +486,7 @@ (define_insn "atomic_cas_value_exchange_7_<mode>" > (clobber (match_scratch:GPR 7 "=&r"))] > "" > { > - return "%G6\\n\\t" > - "1:\\n\\t" > + return "1:\\n\\t" > "ll.<amo>\\t%0,%1\\n\\t" > "and\\t%7,%0,%z3\\n\\t" > "or%i5\\t%7,%7,%5\\n\\t" > diff --git a/gcc/testsuite/gcc.target/loongarch/cas-acquire.c b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c > new file mode 100644 > index 00000000000..ff7ba866f32 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c > @@ -0,0 +1,82 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target c99_runtime } */ > +/* { dg-require-effective-target pthread } */ > +/* { dg-options "-std=c99 -pthread" } */ > + > +/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934 > + reported that this had failed with GCC and 3A6000. */ > + > +#include <pthread.h> > +#include <stdatomic.h> > +#include <stdbool.h> > +#include <stdio.h> > + > +static unsigned int tags[32]; > +static unsigned int vals[32]; > + > +static void * > +writer_entry (void *data) > +{ > + atomic_uint *pt = (atomic_uint *)tags; > + atomic_uint *pv = (atomic_uint *)vals; > + > + for (unsigned int n = 1; n < 10000; n++) > + { > + atomic_store_explicit (&pv[n & 31], n, memory_order_release); > + atomic_store_explicit (&pt[n & 31], n, memory_order_release); > + } > + > + return NULL; > +} > + > +static void * > +reader_entry (void *data) > +{ > + atomic_uint *pt = (atomic_uint *)tags; > + atomic_uint *pv = (atomic_uint *)vals; > + int i; > + > + for (;;) > + { > + for (i = 0; i < 32; i++) > + { > + unsigned int tag = 0; > + bool res; > + > + res = atomic_compare_exchange_weak_explicit ( > + &pt[i], &tag, 0, memory_order_acquire, memory_order_acquire); > + if (!res) > + { > + unsigned int val; > + > + val = atomic_load_explicit (&pv[i], memory_order_relaxed); > + if (val < tag) > + __builtin_trap (); > + } > + } > + } > + > + return NULL; > +} > + > +int > +main (int argc, char *argv[]) > +{ > + pthread_t writer; > + pthread_t reader; > + int res; > + > + res = pthread_create (&writer, NULL, writer_entry, NULL); > + if (res < 0) > + __builtin_trap (); > + > + res = pthread_create (&reader, NULL, reader_entry, NULL); > + if (res < 0) > + __builtin_trap (); > + > + res = pthread_join (writer, NULL); > + if (res < 0) > + __builtin_trap (); > + > + return 0; > +}
Pushed r14-5486.
/* snip */
> > * gcc.target/loongarch/cas-acquire.c: New test.
This test fails with GCC 12/13 on LA664, and it indicates a correctness
issue. May I backport this patch to 12/13 as well?
在 2023/11/15 下午7:38, Xi Ruoyao 写道: > Pushed r14-5486. > > /* snip */ > >>> * gcc.target/loongarch/cas-acquire.c: New test. > This test fails with GCC 12/13 on LA664, and it indicates a correctness > issue. May I backport this patch to 12/13 as well? > I think we can backport. Thanks!
On Thu, 2023-11-16 at 09:18 +0800, chenglulu wrote: > > 在 2023/11/15 下午7:38, Xi Ruoyao 写道: > > Pushed r14-5486. > > > > /* snip */ > > > > > > * gcc.target/loongarch/cas-acquire.c: New test. > > This test fails with GCC 12/13 on LA664, and it indicates a > > correctness > > issue. May I backport this patch to 12/13 as well? > > > I think we can backport. Pushed r12-9984 and r13-8074.
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 2998bf740d4..738911661d7 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -5833,27 +5833,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model) } } -/* Return true if a FENCE should be emitted to before a memory access to - implement the release portion of memory model MODEL. */ +/* Return true if a FENCE should be emitted after a failed CAS to + implement the acquire semantic of failure_memorder. */ static bool -loongarch_memmodel_needs_release_fence (enum memmodel model) +loongarch_cas_failure_memorder_needs_acquire (enum memmodel model) { - switch (model) + switch (memmodel_base (model)) { + case MEMMODEL_ACQUIRE: case MEMMODEL_ACQ_REL: case MEMMODEL_SEQ_CST: - case MEMMODEL_SYNC_SEQ_CST: - case MEMMODEL_RELEASE: - case MEMMODEL_SYNC_RELEASE: return true; - case MEMMODEL_ACQUIRE: - case MEMMODEL_CONSUME: - case MEMMODEL_SYNC_ACQUIRE: case MEMMODEL_RELAXED: + case MEMMODEL_RELEASE: return false; + /* MEMMODEL_CONSUME is deliberately not handled because it's always + replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by + MEMMODEL_CONSUME, read the change (re)introducing it carefully and + decide what to do. See PR 59448 and get_memmodel in builtins.cc. */ default: gcc_unreachable (); } @@ -5966,7 +5966,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, 'd' Print CONST_INT OP in decimal. 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal. 'F' Print the FPU branch condition for comparison OP. - 'G' Print a DBAR insn if the memory model requires a release. + 'G' Print a DBAR insn for CAS failure (with an acquire semantic if + needed, otherwise a simple load-load barrier). 'H' Print address 52-61bit relocation associated with OP. 'h' Print the high-part relocation associated with OP. 'i' Print i if the operand is not a register. @@ -6057,8 +6058,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter) break; case 'G': - if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) - fputs ("dbar\t0", file); + if (loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op)))) + fputs ("dbar\t0b10100", file); + else + fputs ("dbar\t0x700", file); break; case 'h': diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 1ad0c63e0d9..c112091a60f 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -162,19 +162,18 @@ (define_insn "atomic_cas_value_strong<mode>" (clobber (match_scratch:GPR 6 "=&r"))] "" { - return "%G5\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "bne\\t%0,%z2,2f\\n\\t" "or%i3\\t%6,$zero,%3\\n\\t" "sc.<amo>\\t%6,%1\\n\\t" - "beq\\t$zero,%6,1b\\n\\t" + "beqz\\t%6,1b\\n\\t" "b\\t3f\\n\\t" "2:\\n\\t" - "dbar\\t0x700\\n\\t" + "%G5\\n\\t" "3:\\n\\t"; } - [(set (attr "length") (const_int 32))]) + [(set (attr "length") (const_int 28))]) (define_expand "atomic_compare_and_swap<mode>" [(match_operand:SI 0 "register_operand" "") ;; bool output @@ -267,8 +266,7 @@ (define_insn "atomic_cas_value_cmp_and_7_<mode>" (clobber (match_scratch:GPR 7 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%2\\n\\t" "bne\\t%7,%z4,2f\\n\\t" @@ -278,10 +276,10 @@ (define_insn "atomic_cas_value_cmp_and_7_<mode>" "beq\\t$zero,%7,1b\\n\\t" "b\\t3f\\n\\t" "2:\\n\\t" - "dbar\\t0x700\\n\\t" + "%G6\\n\\t" "3:\\n\\t"; } - [(set (attr "length") (const_int 40))]) + [(set (attr "length") (const_int 36))]) (define_expand "atomic_compare_and_swap<mode>" [(match_operand:SI 0 "register_operand" "") ;; bool output @@ -336,8 +334,7 @@ (define_insn "atomic_cas_value_add_7_<mode>" (clobber (match_scratch:GPR 8 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "add.w\\t%8,%0,%z5\\n\\t" @@ -347,7 +344,7 @@ (define_insn "atomic_cas_value_add_7_<mode>" "beq\\t$zero,%7,1b"; } - [(set (attr "length") (const_int 32))]) + [(set (attr "length") (const_int 28))]) (define_insn "atomic_cas_value_sub_7_<mode>" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res @@ -363,8 +360,7 @@ (define_insn "atomic_cas_value_sub_7_<mode>" (clobber (match_scratch:GPR 8 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "sub.w\\t%8,%0,%z5\\n\\t" @@ -373,7 +369,7 @@ (define_insn "atomic_cas_value_sub_7_<mode>" "sc.<amo>\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } - [(set (attr "length") (const_int 32))]) + [(set (attr "length") (const_int 28))]) (define_insn "atomic_cas_value_and_7_<mode>" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res @@ -389,8 +385,7 @@ (define_insn "atomic_cas_value_and_7_<mode>" (clobber (match_scratch:GPR 8 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "and\\t%8,%0,%z5\\n\\t" @@ -399,7 +394,7 @@ (define_insn "atomic_cas_value_and_7_<mode>" "sc.<amo>\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } - [(set (attr "length") (const_int 32))]) + [(set (attr "length") (const_int 28))]) (define_insn "atomic_cas_value_xor_7_<mode>" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res @@ -415,8 +410,7 @@ (define_insn "atomic_cas_value_xor_7_<mode>" (clobber (match_scratch:GPR 8 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "xor\\t%8,%0,%z5\\n\\t" @@ -426,7 +420,7 @@ (define_insn "atomic_cas_value_xor_7_<mode>" "beq\\t$zero,%7,1b"; } - [(set (attr "length") (const_int 32))]) + [(set (attr "length") (const_int 28))]) (define_insn "atomic_cas_value_or_7_<mode>" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res @@ -442,8 +436,7 @@ (define_insn "atomic_cas_value_or_7_<mode>" (clobber (match_scratch:GPR 8 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "or\\t%8,%0,%z5\\n\\t" @@ -453,7 +446,7 @@ (define_insn "atomic_cas_value_or_7_<mode>" "beq\\t$zero,%7,1b"; } - [(set (attr "length") (const_int 32))]) + [(set (attr "length") (const_int 28))]) (define_insn "atomic_cas_value_nand_7_<mode>" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res @@ -469,8 +462,7 @@ (define_insn "atomic_cas_value_nand_7_<mode>" (clobber (match_scratch:GPR 8 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "and\\t%8,%0,%z5\\n\\t" @@ -479,7 +471,7 @@ (define_insn "atomic_cas_value_nand_7_<mode>" "sc.<amo>\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } - [(set (attr "length") (const_int 32))]) + [(set (attr "length") (const_int 28))]) (define_insn "atomic_cas_value_exchange_7_<mode>" [(set (match_operand:GPR 0 "register_operand" "=&r") @@ -494,8 +486,7 @@ (define_insn "atomic_cas_value_exchange_7_<mode>" (clobber (match_scratch:GPR 7 "=&r"))] "" { - return "%G6\\n\\t" - "1:\\n\\t" + return "1:\\n\\t" "ll.<amo>\\t%0,%1\\n\\t" "and\\t%7,%0,%z3\\n\\t" "or%i5\\t%7,%7,%5\\n\\t" diff --git a/gcc/testsuite/gcc.target/loongarch/cas-acquire.c b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c new file mode 100644 index 00000000000..ff7ba866f32 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c @@ -0,0 +1,82 @@ +/* { dg-do run } */ +/* { dg-require-effective-target c99_runtime } */ +/* { dg-require-effective-target pthread } */ +/* { dg-options "-std=c99 -pthread" } */ + +/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934 + reported that this had failed with GCC and 3A6000. */ + +#include <pthread.h> +#include <stdatomic.h> +#include <stdbool.h> +#include <stdio.h> + +static unsigned int tags[32]; +static unsigned int vals[32]; + +static void * +writer_entry (void *data) +{ + atomic_uint *pt = (atomic_uint *)tags; + atomic_uint *pv = (atomic_uint *)vals; + + for (unsigned int n = 1; n < 10000; n++) + { + atomic_store_explicit (&pv[n & 31], n, memory_order_release); + atomic_store_explicit (&pt[n & 31], n, memory_order_release); + } + + return NULL; +} + +static void * +reader_entry (void *data) +{ + atomic_uint *pt = (atomic_uint *)tags; + atomic_uint *pv = (atomic_uint *)vals; + int i; + + for (;;) + { + for (i = 0; i < 32; i++) + { + unsigned int tag = 0; + bool res; + + res = atomic_compare_exchange_weak_explicit ( + &pt[i], &tag, 0, memory_order_acquire, memory_order_acquire); + if (!res) + { + unsigned int val; + + val = atomic_load_explicit (&pv[i], memory_order_relaxed); + if (val < tag) + __builtin_trap (); + } + } + } + + return NULL; +} + +int +main (int argc, char *argv[]) +{ + pthread_t writer; + pthread_t reader; + int res; + + res = pthread_create (&writer, NULL, writer_entry, NULL); + if (res < 0) + __builtin_trap (); + + res = pthread_create (&reader, NULL, reader_entry, NULL); + if (res < 0) + __builtin_trap (); + + res = pthread_join (writer, NULL); + if (res < 0) + __builtin_trap (); + + return 0; +}