Message ID | b7767fca-a9a6-4d69-8e6d-a01a13d9e327@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2] RISC-V: Introduce -mrvv-allow-misalign. | expand |
On Fri, 24 May 2024 09:19:09 PDT (-0700), Robin Dapp wrote: >> We should have something in doc/invoke too, this one is going to be >> tricky for users. We'll also have to define how this interacts with >> the existing -mstrict-align. > > Addressed the rest in the attached v2 which also fixes tests. > I'm really not sure about -mstrict-align. I would have hoped that using > -mstrict-align we'd never run into any movmisalign situation but that > might be wishful thinking. Do we need to specify an > interaction, though? For now the new options disables movmisalign so > if we hit that despite -mstrict-align we'd still not vectorize it. I think we just need to write it down. I think there's two ways to encode this: either we treat scalar and vector as independent, or we couple them. If we treat them independently then we end up with four cases, it's not clear if they're all interesting. IIUC with this patch we'd be able to encode * -mstrict-align: Both scalar and vector misaligned accesses are unsupported (-mrvv-allow-misalign doesn't matter). I'm not sure if there's hardware there, but given we have systems that don't support scalar misaligned accesses it seems reasonable to assume they'll also not support vector misaligned accesses. * -mno-strict-align -mno-rvv-allow-misalign: Scalar misaligned are supported, vector misaligned aren't supported. This matches our best theory of how the k230 and k1 behave, so it also seems reasonable to support. * -mno-strict-align -mrvv-allow-misalign: Both scalar and vector misaligned accesses are supported. This seems reasonable to support as it's how I'd hope big cores end up being designed, though again there's no hardware. The fourth case is kind of wacky: scalar misaligned is unsupported, vector misaligned is supported. I'm not really sure why we'd end up with a system like that, but HW vendors do wacky things so it's kind of hard to predict. IMO it's fine if we're defining that as an unencodeable case it's fine, we can always add something later. We should just write it down so nobody's confused. > Regtested on rv64gcv_zvfh_zvbb. > > Regards > Robin > > This patch changes the default from always enabling movmisalign to > not enabling it. It adds an option to override the default and adds > generic-ooo to the uarchs that support misaligned vector access. > > It also adds a check_effective_target_riscv_v_misalign_ok to the > testsuite which enables or disables the vector misalignment tests > depending on whether the target under test can execute a misaligned > vle32. > > gcc/ChangeLog: > > * config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED): > Move from here... > * config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED): > ...to here and make dependent on uarch and rvv_allow_misalign. > * config/riscv/riscv.opt: Add -mrvv-allow-unaligned. > > gcc/testsuite/ChangeLog: > > * lib/target-supports.exp: Add > check_effective_target_riscv_v_misalign_ok. > * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add > -mrvv-allow-misalign. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto. > * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/misalign-1.c: > --- > gcc/config/riscv/riscv-opts.h | 3 -- > gcc/config/riscv/riscv.cc | 18 ++++++++++ > gcc/config/riscv/riscv.h | 6 ++++ > gcc/config/riscv/riscv.opt | 5 +++ > gcc/doc/invoke.texi | 5 +++ > .../costmodel/riscv/rvv/dynamic-lmul2-7.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-10.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-11.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-12.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-8.c | 2 +- > .../vect/costmodel/riscv/rvv/vla_vs_vls-9.c | 2 +- > .../riscv/rvv/autovec/vls/misalign-1.c | 2 +- > gcc/testsuite/lib/target-supports.exp | 34 +++++++++++++++++-- > 13 files changed, 73 insertions(+), 12 deletions(-) > > diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h > index 1b2dd5757a8..f58a07abffc 100644 > --- a/gcc/config/riscv/riscv-opts.h > +++ b/gcc/config/riscv/riscv-opts.h > @@ -147,9 +147,6 @@ enum rvv_vector_bits_enum { > ? 0 \ > : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1)) > > -/* TODO: Enable RVV movmisalign by default for now. */ > -#define TARGET_VECTOR_MISALIGN_SUPPORTED 1 > - > /* The maximmum LMUL according to user configuration. */ > #define TARGET_MAX_LMUL \ > (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul) > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 85df5b7ab49..cfdeb56559f 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -287,6 +287,7 @@ struct riscv_tune_param > unsigned short memory_cost; > unsigned short fmv_cost; > bool slow_unaligned_access; > + bool rvv_unaligned_access; > bool use_divmod_expansion; > bool overlap_op_by_pieces; > unsigned int fusible_ops; > @@ -299,6 +300,10 @@ struct riscv_tune_param > /* Whether unaligned accesses execute very slowly. */ > bool riscv_slow_unaligned_access_p; > > +/* Whether misaligned vector accesses are supported (i.e. do not > + throw an exception). */ > +bool riscv_rvv_unaligned_access_p; > + > /* Whether user explicitly passed -mstrict-align. */ > bool riscv_user_wants_strict_align; > > @@ -441,6 +446,7 @@ static const struct riscv_tune_param rocket_tune_info = { > 5, /* memory_cost */ > 8, /* fmv_cost */ > true, /* slow_unaligned_access */ > + false, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -459,6 +465,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { > 3, /* memory_cost */ > 8, /* fmv_cost */ > true, /* slow_unaligned_access */ > + false, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -477,6 +484,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = { > 3, /* memory_cost */ > 4, /* fmv_cost */ > true, /* slow_unaligned_access */ > + false, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ > @@ -495,6 +503,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = { > 3, /* memory_cost */ > 4, /* fmv_cost */ > true, /* slow_unaligned_access */ > + false, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ > @@ -513,6 +522,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { > 5, /* memory_cost */ > 8, /* fmv_cost */ > false, /* slow_unaligned_access */ > + false, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -531,6 +541,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = { > 3, /* memory_cost */ > 3, /* fmv_cost */ > true, /* slow_unaligned_access */ > + false, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */ > @@ -549,6 +560,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = { > 4, /* memory_cost */ > 4, /* fmv_cost */ > false, /* slow_unaligned_access */ > + true, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > true, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -567,6 +579,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { > 2, /* memory_cost */ > 8, /* fmv_cost */ > false, /* slow_unaligned_access */ > + false, /* rvv_unaligned_access */ > false, /* use_divmod_expansion */ > false, /* overlap_op_by_pieces */ > RISCV_FUSE_NOTHING, /* fusible_ops */ > @@ -9536,6 +9549,11 @@ riscv_override_options_internal (struct gcc_options *opts) > riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access > || TARGET_STRICT_ALIGN); > > + /* Use -mtune's setting to determine whether unaligned vector accesses > + are supported. */ > + riscv_rvv_unaligned_access_p = (cpu->tune_param->rvv_unaligned_access > + && !TARGET_STRICT_ALIGN); > + > /* Make a note if user explicitly passed -mstrict-align for later > builtin macro generation. Can't use target_flags_explicitly since > it is set even for -mno-strict-align. */ > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index d6b14c4d620..691d0a6fc57 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -934,6 +934,11 @@ extern enum riscv_cc get_riscv_cc (const rtx use); > || (riscv_microarchitecture == sifive_p400) \ > || (riscv_microarchitecture == sifive_p600)) > > +/* True if the target supports misaligned vector loads and stores. */ > +#define TARGET_VECTOR_MISALIGN_SUPPORTED \ > + (rvv_allow_misalign == 1 \ > + || riscv_rvv_unaligned_access_p) > + > #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 > > /* Control the assembler format that we output. */ > @@ -1161,6 +1166,7 @@ while (0) > #ifndef USED_FOR_TARGET > extern const enum reg_class riscv_regno_to_class[]; > extern bool riscv_slow_unaligned_access_p; > +extern bool riscv_rvv_unaligned_access_p; > extern bool riscv_user_wants_strict_align; > extern unsigned riscv_stack_boundary; > extern unsigned riscv_bytes_per_vector_chunk; > diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt > index 87f58332016..0162429ee8f 100644 > --- a/gcc/config/riscv/riscv.opt > +++ b/gcc/config/riscv/riscv.opt > @@ -628,3 +628,8 @@ Specify TLS dialect. > mfence-tso > Target Var(TARGET_FENCE_TSO) Init(1) > Specifies whether the fence.tso instruction should be used. > + > +mrvv-allow-misalign > +Target Var(rvv_allow_misalign) Init(0) > +Allow the creation of element-misaligned vector loads and stores irrespective > +of the current uarch. The default is off. > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index c9d8f6b37b6..0586caa52a0 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -31163,6 +31163,11 @@ Generate little-endian code. This is the default when GCC is configured for a > @samp{riscv64-*-*} or @samp{riscv32-*-*} but not a @samp{riscv64be-*-*} or > @samp{riscv32be-*-*} target. > > +@opindex mrvv-allow-misalign > +@item -mrvv-allow-misalign > +Allow the creation of element-misaligned vector loads and stores irrespective > +of the current uarch. The default is off. > + > @opindex mstack-protector-guard > @opindex mstack-protector-guard-reg > @opindex mstack-protector-guard-offset > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c > index 49ea3c2cf72..4b72c6a4d20 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mrvv-allow-misalign" } */ > > int > x264_pixel_8x8 (unsigned char *pix1, unsigned char *pix2, int i_stride_pix2) > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c > index 144479324d7..c31e2fbe12e 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2 -mrvv-allow-misalign" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c > index 13ae8bd3bcf..a6381368b81 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2 -mrvv-allow-misalign" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c > index 1f9fa48264e..f29a94fffe4 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2 -mrvv-allow-misalign" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c > index ea6a7cbe2b1..f60e54d2cb8 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-allow-misalign" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c > index cb4abeca989..b84256811c7 100644 > --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c > +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 -mrvv-allow-misalign" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c > index 1a076cbcd0f..9c11d897371 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns" } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns -mrvv-allow-misalign" } */ > > #include <stdlib.h> > > diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp > index f0f6da52275..ebb908f5c8f 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -2034,7 +2034,7 @@ proc check_effective_target_riscv_zvfh_ok { } { > # check if we can execute vector insns with the given hardware or > # simulator > set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] > - if { [check_runtime ${gcc_march}_exec { > + if { [check_runtime ${gcc_march}_zvfh_exec { > int main() > { > asm ("vsetivli zero,8,e16,m1,ta,ma"); > @@ -2047,6 +2047,29 @@ proc check_effective_target_riscv_zvfh_ok { } { > return 0 > } > > +# Return 1 if we can load a vector from a 1-byte aligned address. > + > +proc check_effective_target_riscv_v_misalign_ok { } { > + > + if { ![check_effective_target_riscv_v_ok] } { > + return 0 > + } > + > + set gcc_march [riscv_get_arch] > + if { [check_runtime ${gcc_march}_misalign_exec { > + int main() { > + unsigned char a[16] > + = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; > + asm ("vsetivli zero,7,e8,m1,ta,ma"); > + asm ("addi a7,%0,1" : : "r" (a) : "a7" ); > + asm ("vle8.v v8,0(a7)" : : : "v8"); > + return 0; } } "-march=${gcc_march}"] } { > + return 1 > + } > + > + return 0 > +} > + > proc riscv_get_arch { } { > set gcc_march "" > # ??? do we neeed to add more extensions to the list below? > @@ -8139,7 +8162,6 @@ proc check_effective_target_vect_hw_misalign { } { > || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) > || ([istarget s390*-*-*] > && [check_effective_target_s390_vx]) > - || ([istarget riscv*-*-*]) > || ([istarget loongarch*-*-*]) > || [istarget amdgcn*-*-*] } { > return 1 > @@ -8148,6 +8170,11 @@ proc check_effective_target_vect_hw_misalign { } { > && ![check_effective_target_arm_vect_no_misalign] } { > return 1 > } > + if { [istarget riscv*-*-*] > + && [check_effective_target_riscv_v_misalign_ok] } { > + return 1 > + } > + > return 0 > }] > } > @@ -11565,6 +11592,9 @@ proc check_vect_support_and_set_flags { } { > } elseif [istarget riscv*-*-*] { > if [check_effective_target_riscv_v] { > set dg-do-what-default run > + if [check_effective_target_riscv_v_misalign_ok] { > + lappend DEFAULT_VECTCFLAGS "-mrvv-allow-misalign" > + } > } else { > foreach item [add_options_for_riscv_v ""] { > lappend DEFAULT_VECTCFLAGS $item
> * -mstrict-align: Both scalar and vector misaligned accesses are > unsupported (-mrvv-allow-misalign doesn't matter). I'm not sure if > there's hardware there, but given we have systems that don't support > scalar misaligned accesses it seems reasonable to assume they'll also > not support vector misaligned accesses. As a data point, and contrary to what I said/hoped before: There are examples where -mstrict-align and -mrvv-allow-misalign vectorizes code and produces unaligned vector accesses. I haven't looked into that area of the vectorizer for a while but it doesn't appear as if we regard STRICT_ALIGNMENT there at all. We keep track of the known misalignments (via peeling etc.) and either handle them via movmisalign or give up. Same for unknown misalignment but all unaffected by -mstrict-align. We could have -mrvv-allow-misalign have an "| STRICT_ALIGNMENT" to get to the behavior you described but right now it's not like that. And AFAICT -mstrict-align behaves the same way for other targets, regardless if they support unaligned vector accesses or not. So, right now, I'd tend towards describing that both flags are independent and affect either only scalar or only vector code. Maybe we should rename the whole thing to -mrvv-strict-align? Might make it even more confusing, though. Regards Robin
On 5/24/24 11:14 AM, Palmer Dabbelt wrote: > On Fri, 24 May 2024 09:19:09 PDT (-0700), Robin Dapp wrote: >>> We should have something in doc/invoke too, this one is going to be >>> tricky for users. We'll also have to define how this interacts with >>> the existing -mstrict-align. >> >> Addressed the rest in the attached v2 which also fixes tests. >> I'm really not sure about -mstrict-align. I would have hoped that using >> -mstrict-align we'd never run into any movmisalign situation but that >> might be wishful thinking. Do we need to specify an >> interaction, though? For now the new options disables movmisalign so >> if we hit that despite -mstrict-align we'd still not vectorize it. > > I think we just need to write it down. I think there's two ways to > encode this: either we treat scalar and vector as independent, or we > couple them. If we treat them independently then we end up with four > cases, it's not clear if they're all interesting. IIUC with this patch > we'd be able to encode Given the ISA documents them as independent, I think we should follow suit and allow them to vary independently. > > * -mstrict-align: Both scalar and vector misaligned accesses are > unsupported (-mrvv-allow-misalign doesn't matter). I'm not sure if > there's hardware there, but given we have systems that don't support > scalar misaligned accesses it seems reasonable to assume they'll also > not support vector misaligned accesses. > * -mno-strict-align -mno-rvv-allow-misalign: Scalar misaligned are > supported, vector misaligned aren't supported. This matches our best > theory of how the k230 and k1 behave, so it also seems reasonable to > support. > * -mno-strict-align -mrvv-allow-misalign: Both scalar and vector > misaligned accesses are supported. This seems reasonable to support > as it's how I'd hope big cores end up being designed, though again > there's no hardware. I'd almost lean towards -m[no-]scalar-strict-align and -m[no-]vector-strict-align and deprecate -mstrict-align (aliasing it to the scalar alignment option). But I'll go with consensus here. > > The fourth case is kind of wacky: scalar misaligned is unsupported, > vector misaligned is supported. I'm not really sure why we'd end up > with a system like that, but HW vendors do wacky things so it's kind of > hard to predict. I've worked on one of these :-) The thinking from the designers was unaligned scalar access just wasn't that important, particularly with mem* and str* using the vector rather than scalar ops. jeff
On Fri, 24 May 2024 16:31:48 PDT (-0700), jeffreyalaw@gmail.com wrote: > > > On 5/24/24 11:14 AM, Palmer Dabbelt wrote: >> On Fri, 24 May 2024 09:19:09 PDT (-0700), Robin Dapp wrote: >>>> We should have something in doc/invoke too, this one is going to be >>>> tricky for users. We'll also have to define how this interacts with >>>> the existing -mstrict-align. >>> >>> Addressed the rest in the attached v2 which also fixes tests. >>> I'm really not sure about -mstrict-align. I would have hoped that using >>> -mstrict-align we'd never run into any movmisalign situation but that >>> might be wishful thinking. Do we need to specify an >>> interaction, though? For now the new options disables movmisalign so >>> if we hit that despite -mstrict-align we'd still not vectorize it. >> >> I think we just need to write it down. I think there's two ways to >> encode this: either we treat scalar and vector as independent, or we >> couple them. If we treat them independently then we end up with four >> cases, it's not clear if they're all interesting. IIUC with this patch >> we'd be able to encode > Given the ISA documents them as independent, I think we should follow > suit and allow them to vary independently. I'm only reading Zicclsm as saying both scalar and vector misaligned accesses are supported, but nothing about the performance. >> * -mstrict-align: Both scalar and vector misaligned accesses are >> unsupported (-mrvv-allow-misalign doesn't matter). I'm not sure if >> there's hardware there, but given we have systems that don't support >> scalar misaligned accesses it seems reasonable to assume they'll also >> not support vector misaligned accesses. >> * -mno-strict-align -mno-rvv-allow-misalign: Scalar misaligned are >> supported, vector misaligned aren't supported. This matches our best >> theory of how the k230 and k1 behave, so it also seems reasonable to >> support. >> * -mno-strict-align -mrvv-allow-misalign: Both scalar and vector >> misaligned accesses are supported. This seems reasonable to support >> as it's how I'd hope big cores end up being designed, though again >> there's no hardware. > I'd almost lean towards -m[no-]scalar-strict-align and > -m[no-]vector-strict-align and deprecate -mstrict-align (aliasing it to > the scalar alignment option). But I'll go with consensus here. Seems reasonable to me. Just having a regular naming scheme for the scalar/vector makes it clear what we're doing, and it's not like having the extra name for -mscalar-strict-align really costs anything. >> The fourth case is kind of wacky: scalar misaligned is unsupported, >> vector misaligned is supported. I'm not really sure why we'd end up >> with a system like that, but HW vendors do wacky things so it's kind of >> hard to predict. > I've worked on one of these :-) The thinking from the designers was > unaligned scalar access just wasn't that important, particularly with > mem* and str* using the vector rather than scalar ops. OK then ;)
On 5/24/24 5:39 PM, Palmer Dabbelt wrote: > On Fri, 24 May 2024 16:31:48 PDT (-0700), jeffreyalaw@gmail.com wrote: >> >> >> On 5/24/24 11:14 AM, Palmer Dabbelt wrote: >>> On Fri, 24 May 2024 09:19:09 PDT (-0700), Robin Dapp wrote: >>>>> We should have something in doc/invoke too, this one is going to be >>>>> tricky for users. We'll also have to define how this interacts with >>>>> the existing -mstrict-align. >>>> >>>> Addressed the rest in the attached v2 which also fixes tests. >>>> I'm really not sure about -mstrict-align. I would have hoped that >>>> using >>>> -mstrict-align we'd never run into any movmisalign situation but that >>>> might be wishful thinking. Do we need to specify an >>>> interaction, though? For now the new options disables movmisalign so >>>> if we hit that despite -mstrict-align we'd still not vectorize it. >>> >>> I think we just need to write it down. I think there's two ways to >>> encode this: either we treat scalar and vector as independent, or we >>> couple them. If we treat them independently then we end up with four >>> cases, it's not clear if they're all interesting. IIUC with this patch >>> we'd be able to encode >> Given the ISA documents them as independent, I think we should follow >> suit and allow them to vary independently. > > I'm only reading Zicclsm as saying both scalar and vector misaligned > accesses are supported, but nothing about the performance. I think it was in the vector docs. It didn't say anything about performance, just a note that scalar & vector behavior could differ. > > > Seems reasonable to me. Just having a regular naming scheme for the > scalar/vector makes it clear what we're doing, and it's not like having > the extra name for -mscalar-strict-align really costs anything. That was my thinking -- get the names right should help avoid confusion. Jeff
On Fri, 24 May 2024 16:41:39 PDT (-0700), jeffreyalaw@gmail.com wrote: > > > On 5/24/24 5:39 PM, Palmer Dabbelt wrote: >> On Fri, 24 May 2024 16:31:48 PDT (-0700), jeffreyalaw@gmail.com wrote: >>> >>> >>> On 5/24/24 11:14 AM, Palmer Dabbelt wrote: >>>> On Fri, 24 May 2024 09:19:09 PDT (-0700), Robin Dapp wrote: >>>>>> We should have something in doc/invoke too, this one is going to be >>>>>> tricky for users. We'll also have to define how this interacts with >>>>>> the existing -mstrict-align. >>>>> >>>>> Addressed the rest in the attached v2 which also fixes tests. >>>>> I'm really not sure about -mstrict-align. I would have hoped that >>>>> using >>>>> -mstrict-align we'd never run into any movmisalign situation but that >>>>> might be wishful thinking. Do we need to specify an >>>>> interaction, though? For now the new options disables movmisalign so >>>>> if we hit that despite -mstrict-align we'd still not vectorize it. >>>> >>>> I think we just need to write it down. I think there's two ways to >>>> encode this: either we treat scalar and vector as independent, or we >>>> couple them. If we treat them independently then we end up with four >>>> cases, it's not clear if they're all interesting. IIUC with this patch >>>> we'd be able to encode >>> Given the ISA documents them as independent, I think we should follow >>> suit and allow them to vary independently. >> >> I'm only reading Zicclsm as saying both scalar and vector misaligned >> accesses are supported, but nothing about the performance. > I think it was in the vector docs. It didn't say anything about > performance, just a note that scalar & vector behavior could differ. Either way, the split naming scheme seems clearer to me. It also avoids getting mixed up by the no-scalar-misaligned, yes-vector-misaligned systems if they ever show up. So if Robin's OK with re-spinning things, let's just go that way? >> Seems reasonable to me. Just having a regular naming scheme for the >> scalar/vector makes it clear what we're doing, and it's not like having >> the extra name for -mscalar-strict-align really costs anything. > That was my thinking -- get the names right should help avoid confusion. > > Jeff
On 5/24/24 5:43 PM, Palmer Dabbelt wrote: >>> >>> I'm only reading Zicclsm as saying both scalar and vector misaligned >>> accesses are supported, but nothing about the performance. >> I think it was in the vector docs. It didn't say anything about >> performance, just a note that scalar & vector behavior could differ. > > Either way, the split naming scheme seems clearer to me. It also avoids > getting mixed up by the no-scalar-misaligned, yes-vector-misaligned > systems if they ever show up. > > So if Robin's OK with re-spinning things, let's just go that way? Works for me. Hopefully he's offline until Monday as it's rather late for him :-) So we'll pick it back up in the Tuesday meeting. jeff
On Fri, 24 May 2024 16:50:52 PDT (-0700), jeffreyalaw@gmail.com wrote: > > > On 5/24/24 5:43 PM, Palmer Dabbelt wrote: > >>>> >>>> I'm only reading Zicclsm as saying both scalar and vector misaligned >>>> accesses are supported, but nothing about the performance. >>> I think it was in the vector docs. It didn't say anything about >>> performance, just a note that scalar & vector behavior could differ. >> >> Either way, the split naming scheme seems clearer to me. It also avoids >> getting mixed up by the no-scalar-misaligned, yes-vector-misaligned >> systems if they ever show up. >> >> So if Robin's OK with re-spinning things, let's just go that way? > Works for me. Hopefully he's offline until Monday as it's rather late > for him :-) So we'll pick it back up in the Tuesday meeting. Cool, no rush on my end. > > jeff
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 1b2dd5757a8..f58a07abffc 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -147,9 +147,6 @@ enum rvv_vector_bits_enum { ? 0 \ : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1)) -/* TODO: Enable RVV movmisalign by default for now. */ -#define TARGET_VECTOR_MISALIGN_SUPPORTED 1 - /* The maximmum LMUL according to user configuration. */ #define TARGET_MAX_LMUL \ (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 85df5b7ab49..cfdeb56559f 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -287,6 +287,7 @@ struct riscv_tune_param unsigned short memory_cost; unsigned short fmv_cost; bool slow_unaligned_access; + bool rvv_unaligned_access; bool use_divmod_expansion; bool overlap_op_by_pieces; unsigned int fusible_ops; @@ -299,6 +300,10 @@ struct riscv_tune_param /* Whether unaligned accesses execute very slowly. */ bool riscv_slow_unaligned_access_p; +/* Whether misaligned vector accesses are supported (i.e. do not + throw an exception). */ +bool riscv_rvv_unaligned_access_p; + /* Whether user explicitly passed -mstrict-align. */ bool riscv_user_wants_strict_align; @@ -441,6 +446,7 @@ static const struct riscv_tune_param rocket_tune_info = { 5, /* memory_cost */ 8, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_NOTHING, /* fusible_ops */ @@ -459,6 +465,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { 3, /* memory_cost */ 8, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_NOTHING, /* fusible_ops */ @@ -477,6 +484,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = { 3, /* memory_cost */ 4, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ @@ -495,6 +503,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = { 3, /* memory_cost */ 4, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ @@ -513,6 +522,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { 5, /* memory_cost */ 8, /* fmv_cost */ false, /* slow_unaligned_access */ + false, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_NOTHING, /* fusible_ops */ @@ -531,6 +541,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = { 3, /* memory_cost */ 3, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */ @@ -549,6 +560,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = { 4, /* memory_cost */ 4, /* fmv_cost */ false, /* slow_unaligned_access */ + true, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ true, /* overlap_op_by_pieces */ RISCV_FUSE_NOTHING, /* fusible_ops */ @@ -567,6 +579,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { 2, /* memory_cost */ 8, /* fmv_cost */ false, /* slow_unaligned_access */ + false, /* rvv_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_NOTHING, /* fusible_ops */ @@ -9536,6 +9549,11 @@ riscv_override_options_internal (struct gcc_options *opts) riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access || TARGET_STRICT_ALIGN); + /* Use -mtune's setting to determine whether unaligned vector accesses + are supported. */ + riscv_rvv_unaligned_access_p = (cpu->tune_param->rvv_unaligned_access + && !TARGET_STRICT_ALIGN); + /* Make a note if user explicitly passed -mstrict-align for later builtin macro generation. Can't use target_flags_explicitly since it is set even for -mno-strict-align. */ diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index d6b14c4d620..691d0a6fc57 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -934,6 +934,11 @@ extern enum riscv_cc get_riscv_cc (const rtx use); || (riscv_microarchitecture == sifive_p400) \ || (riscv_microarchitecture == sifive_p600)) +/* True if the target supports misaligned vector loads and stores. */ +#define TARGET_VECTOR_MISALIGN_SUPPORTED \ + (rvv_allow_misalign == 1 \ + || riscv_rvv_unaligned_access_p) + #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 /* Control the assembler format that we output. */ @@ -1161,6 +1166,7 @@ while (0) #ifndef USED_FOR_TARGET extern const enum reg_class riscv_regno_to_class[]; extern bool riscv_slow_unaligned_access_p; +extern bool riscv_rvv_unaligned_access_p; extern bool riscv_user_wants_strict_align; extern unsigned riscv_stack_boundary; extern unsigned riscv_bytes_per_vector_chunk; diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 87f58332016..0162429ee8f 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -628,3 +628,8 @@ Specify TLS dialect. mfence-tso Target Var(TARGET_FENCE_TSO) Init(1) Specifies whether the fence.tso instruction should be used. + +mrvv-allow-misalign +Target Var(rvv_allow_misalign) Init(0) +Allow the creation of element-misaligned vector loads and stores irrespective +of the current uarch. The default is off. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c9d8f6b37b6..0586caa52a0 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31163,6 +31163,11 @@ Generate little-endian code. This is the default when GCC is configured for a @samp{riscv64-*-*} or @samp{riscv32-*-*} but not a @samp{riscv64be-*-*} or @samp{riscv32be-*-*} target. +@opindex mrvv-allow-misalign +@item -mrvv-allow-misalign +Allow the creation of element-misaligned vector loads and stores irrespective +of the current uarch. The default is off. + @opindex mstack-protector-guard @opindex mstack-protector-guard-reg @opindex mstack-protector-guard-offset diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c index 49ea3c2cf72..4b72c6a4d20 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mrvv-allow-misalign" } */ int x264_pixel_8x8 (unsigned char *pix1, unsigned char *pix2, int i_stride_pix2) diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c index 144479324d7..c31e2fbe12e 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2 -mrvv-allow-misalign" } */ #include <stdint-gcc.h> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c index 13ae8bd3bcf..a6381368b81 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2 -mrvv-allow-misalign" } */ #include <stdint-gcc.h> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c index 1f9fa48264e..f29a94fffe4 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2 -mrvv-allow-misalign" } */ #include <stdint-gcc.h> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c index ea6a7cbe2b1..f60e54d2cb8 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-allow-misalign" } */ #include <stdint-gcc.h> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c index cb4abeca989..b84256811c7 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 -mrvv-allow-misalign" } */ #include <stdint-gcc.h> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c index 1a076cbcd0f..9c11d897371 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns -mrvv-allow-misalign" } */ #include <stdlib.h> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index f0f6da52275..ebb908f5c8f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2034,7 +2034,7 @@ proc check_effective_target_riscv_zvfh_ok { } { # check if we can execute vector insns with the given hardware or # simulator set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] - if { [check_runtime ${gcc_march}_exec { + if { [check_runtime ${gcc_march}_zvfh_exec { int main() { asm ("vsetivli zero,8,e16,m1,ta,ma"); @@ -2047,6 +2047,29 @@ proc check_effective_target_riscv_zvfh_ok { } { return 0 } +# Return 1 if we can load a vector from a 1-byte aligned address. + +proc check_effective_target_riscv_v_misalign_ok { } { + + if { ![check_effective_target_riscv_v_ok] } { + return 0 + } + + set gcc_march [riscv_get_arch] + if { [check_runtime ${gcc_march}_misalign_exec { + int main() { + unsigned char a[16] + = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + asm ("vsetivli zero,7,e8,m1,ta,ma"); + asm ("addi a7,%0,1" : : "r" (a) : "a7" ); + asm ("vle8.v v8,0(a7)" : : : "v8"); + return 0; } } "-march=${gcc_march}"] } { + return 1 + } + + return 0 +} + proc riscv_get_arch { } { set gcc_march "" # ??? do we neeed to add more extensions to the list below? @@ -8139,7 +8162,6 @@ proc check_effective_target_vect_hw_misalign { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] && [check_effective_target_s390_vx]) - || ([istarget riscv*-*-*]) || ([istarget loongarch*-*-*]) || [istarget amdgcn*-*-*] } { return 1 @@ -8148,6 +8170,11 @@ proc check_effective_target_vect_hw_misalign { } { && ![check_effective_target_arm_vect_no_misalign] } { return 1 } + if { [istarget riscv*-*-*] + && [check_effective_target_riscv_v_misalign_ok] } { + return 1 + } + return 0 }] } @@ -11565,6 +11592,9 @@ proc check_vect_support_and_set_flags { } { } elseif [istarget riscv*-*-*] { if [check_effective_target_riscv_v] { set dg-do-what-default run + if [check_effective_target_riscv_v_misalign_ok] { + lappend DEFAULT_VECTCFLAGS "-mrvv-allow-misalign" + } } else { foreach item [add_options_for_riscv_v ""] { lappend DEFAULT_VECTCFLAGS $item