Message ID | 20240226142235.3215553-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v2] DSE: Bugfix ICE after allow vector type in get_stored_val | expand |
On Mon, Feb 26, 2024 at 3:22 PM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > We allowed vector type for get_stored_val when read is less than or > equal to store in previous. Unfortunately, we missed to adjust the > validate_subreg part accordingly. When the vector type's size is > less than vector register, it will be considered as invalid in the > validate_subreg. > > Consider the validate_subreg is kind of a can with worms and we are > in stage 4. We will fix the issue from the DES side, and make sure > the subreg is valid for both the read_mode and store_mode before > perform the real gen_lowpart. > > The below test are passed for this patch: > > * The x86 bootstrap test. > * The x86 regression test. > * The riscv regression test. > * The aarch64 regression test. > > gcc/ChangeLog: > > * dse.cc (get_stored_val): Add validate_subreg check before > perform the gen_lowpart for rtl. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger > the ICE. > * gcc.target/riscv/rvv/base/bug-6.c: New test. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/dse.cc | 4 +++- > gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c | 2 +- > .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++++++++++++++++++ > 3 files changed, 26 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c > > diff --git a/gcc/dse.cc b/gcc/dse.cc > index edc7a1dfecf..1596da91da0 100644 > --- a/gcc/dse.cc > +++ b/gcc/dse.cc > @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, > copy_rtx (store_info->const_rhs)); > else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) > && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) > - && targetm.modes_tieable_p (read_mode, store_mode)) > + && targetm.modes_tieable_p (read_mode, store_mode) > + && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs), > + subreg_lowpart_offset (read_mode, store_mode))) > read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); Thanks for the 2nd try. I'll note the above uses gen_lowpart but validate_subreg which is sort-of a mismatch? But I'll leave this for review to people with more knowledge in this area. Jeff? Richard? Thanks, Richard. > else > read_reg = extract_low_bits (read_mode, store_mode, > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > index f79b4c142ae..624a00a4f32 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-fre1" } */ > +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */ > > struct A { float x, y; }; > struct B { struct A u; }; > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c > new file mode 100644 > index 00000000000..5bb00b8f587 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c > @@ -0,0 +1,22 @@ > +/* Test that we do not have ice when compile */ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */ > + > +struct A { float x, y; }; > +struct B { struct A u; }; > + > +extern void bar (struct A *); > + > +float > +f3 (struct B *x, int y) > +{ > + struct A p = {1.0f, 2.0f}; > + struct A *q = &x[y].u; > + > + __builtin_memcpy (&q->x, &p.x, sizeof (float)); > + __builtin_memcpy (&q->y, &p.y, sizeof (float)); > + > + bar (&p); > + > + return x[y].u.x + x[y].u.y; > +} > -- > 2.34.1 >
On 2/26/24 07:22, pan2.li@intel.com wrote: > From: Pan Li <pan2.li@intel.com> > > We allowed vector type for get_stored_val when read is less than or > equal to store in previous. Unfortunately, we missed to adjust the > validate_subreg part accordingly. When the vector type's size is > less than vector register, it will be considered as invalid in the > validate_subreg. > > Consider the validate_subreg is kind of a can with worms and we are > in stage 4. We will fix the issue from the DES side, and make sure > the subreg is valid for both the read_mode and store_mode before > perform the real gen_lowpart. > > The below test are passed for this patch: > > * The x86 bootstrap test. > * The x86 regression test. > * The riscv regression test. > * The aarch64 regression test. > > gcc/ChangeLog: > > * dse.cc (get_stored_val): Add validate_subreg check before > perform the gen_lowpart for rtl. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger > the ICE. > * gcc.target/riscv/rvv/base/bug-6.c: New test. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/dse.cc | 4 +++- > gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c | 2 +- > .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++++++++++++++++++ > 3 files changed, 26 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c > > diff --git a/gcc/dse.cc b/gcc/dse.cc > index edc7a1dfecf..1596da91da0 100644 > --- a/gcc/dse.cc > +++ b/gcc/dse.cc > @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, > copy_rtx (store_info->const_rhs)); > else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) > && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) > - && targetm.modes_tieable_p (read_mode, store_mode)) > + && targetm.modes_tieable_p (read_mode, store_mode) > + && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs), > + subreg_lowpart_offset (read_mode, store_mode))) > read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); > else > read_reg = extract_low_bits (read_mode, store_mode, So we're just changing whether or not we call gen_lowpart directly or go through extract_low_bits, which may in turn generate subreg, call gen_lowpart itself and a few other things. I'm guessing that extract_low_bits is going to return NULL in this case via this code (specifically the second test). > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; Pan, can you confirm what path we take through extract_low_bits? One might argue that we should just call into extract_low_bits unconditionally since it'll ultimately call gen_lowpart when it safely can. The downside is that's a bigger change than I'd like at this stage in our development cycle. I wouldn't be surprised if other direct uses of gen_lowpart have similar problems. > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > index f79b4c142ae..624a00a4f32 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-fre1" } */ > +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */ > > struct A { float x, y; }; > struct B { struct A u; }; So this may compromise the original intent of this test. What I would suggest instead is to create a new test with the dg-do & dg-options you want with a #include "ssa-fre-44.c". So to move forward. Let's confirm the path we take through extract_low_bits matches expectations and fixup the testsuite change. Jeff
> Pan, can you confirm what path we take through extract_low_bits? Thanks Jeff for comments, will have a try soon and keep you posted. Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Tuesday, February 27, 2024 11:03 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 2/26/24 07:22, pan2.li@intel.com wrote: > From: Pan Li <pan2.li@intel.com> > > We allowed vector type for get_stored_val when read is less than or > equal to store in previous. Unfortunately, we missed to adjust the > validate_subreg part accordingly. When the vector type's size is > less than vector register, it will be considered as invalid in the > validate_subreg. > > Consider the validate_subreg is kind of a can with worms and we are > in stage 4. We will fix the issue from the DES side, and make sure > the subreg is valid for both the read_mode and store_mode before > perform the real gen_lowpart. > > The below test are passed for this patch: > > * The x86 bootstrap test. > * The x86 regression test. > * The riscv regression test. > * The aarch64 regression test. > > gcc/ChangeLog: > > * dse.cc (get_stored_val): Add validate_subreg check before > perform the gen_lowpart for rtl. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger > the ICE. > * gcc.target/riscv/rvv/base/bug-6.c: New test. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/dse.cc | 4 +++- > gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c | 2 +- > .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++++++++++++++++++ > 3 files changed, 26 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c > > diff --git a/gcc/dse.cc b/gcc/dse.cc > index edc7a1dfecf..1596da91da0 100644 > --- a/gcc/dse.cc > +++ b/gcc/dse.cc > @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, > copy_rtx (store_info->const_rhs)); > else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) > && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) > - && targetm.modes_tieable_p (read_mode, store_mode)) > + && targetm.modes_tieable_p (read_mode, store_mode) > + && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs), > + subreg_lowpart_offset (read_mode, store_mode))) > read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); > else > read_reg = extract_low_bits (read_mode, store_mode, So we're just changing whether or not we call gen_lowpart directly or go through extract_low_bits, which may in turn generate subreg, call gen_lowpart itself and a few other things. I'm guessing that extract_low_bits is going to return NULL in this case via this code (specifically the second test). > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; Pan, can you confirm what path we take through extract_low_bits? One might argue that we should just call into extract_low_bits unconditionally since it'll ultimately call gen_lowpart when it safely can. The downside is that's a bigger change than I'd like at this stage in our development cycle. I wouldn't be surprised if other direct uses of gen_lowpart have similar problems. > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > index f79b4c142ae..624a00a4f32 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-fre1" } */ > +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */ > > struct A { float x, y; }; > struct B { struct A u; }; So this may compromise the original intent of this test. What I would suggest instead is to create a new test with the dg-do & dg-options you want with a #include "ssa-fre-44.c". So to move forward. Let's confirm the path we take through extract_low_bits matches expectations and fixup the testsuite change. Jeff
> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; Yes, will return NULL_RTX for in the first if, given src_int_mode is E_DImode while src_mode is E_V2SFmode and mode is E_V4QImode. The extract_low_bits convert the modes E_V2SFmode/E_V4QImode to E_DImode/E_SImode in advance before tieable checking, validate_subreg and gen_lowpart. Not sure if my understanding is correct but looks extract_low_bits cannot take care of vector modes up to a point because vector modes are always untieable to its' int mode, and then return NULL_RTX. Pan -----Original Message----- From: Li, Pan2 Sent: Wednesday, February 28, 2024 9:41 AM To: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <Hongtao.Liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val > Pan, can you confirm what path we take through extract_low_bits? Thanks Jeff for comments, will have a try soon and keep you posted. Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Tuesday, February 27, 2024 11:03 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 2/26/24 07:22, pan2.li@intel.com wrote: > From: Pan Li <pan2.li@intel.com> > > We allowed vector type for get_stored_val when read is less than or > equal to store in previous. Unfortunately, we missed to adjust the > validate_subreg part accordingly. When the vector type's size is > less than vector register, it will be considered as invalid in the > validate_subreg. > > Consider the validate_subreg is kind of a can with worms and we are > in stage 4. We will fix the issue from the DES side, and make sure > the subreg is valid for both the read_mode and store_mode before > perform the real gen_lowpart. > > The below test are passed for this patch: > > * The x86 bootstrap test. > * The x86 regression test. > * The riscv regression test. > * The aarch64 regression test. > > gcc/ChangeLog: > > * dse.cc (get_stored_val): Add validate_subreg check before > perform the gen_lowpart for rtl. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/ssa-fre-44.c: Add compile option to trigger > the ICE. > * gcc.target/riscv/rvv/base/bug-6.c: New test. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/dse.cc | 4 +++- > gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c | 2 +- > .../gcc.target/riscv/rvv/base/bug-6.c | 22 +++++++++++++++++++ > 3 files changed, 26 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c > > diff --git a/gcc/dse.cc b/gcc/dse.cc > index edc7a1dfecf..1596da91da0 100644 > --- a/gcc/dse.cc > +++ b/gcc/dse.cc > @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, > copy_rtx (store_info->const_rhs)); > else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) > && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) > - && targetm.modes_tieable_p (read_mode, store_mode)) > + && targetm.modes_tieable_p (read_mode, store_mode) > + && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs), > + subreg_lowpart_offset (read_mode, store_mode))) > read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); > else > read_reg = extract_low_bits (read_mode, store_mode, So we're just changing whether or not we call gen_lowpart directly or go through extract_low_bits, which may in turn generate subreg, call gen_lowpart itself and a few other things. I'm guessing that extract_low_bits is going to return NULL in this case via this code (specifically the second test). > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; Pan, can you confirm what path we take through extract_low_bits? One might argue that we should just call into extract_low_bits unconditionally since it'll ultimately call gen_lowpart when it safely can. The downside is that's a bigger change than I'd like at this stage in our development cycle. I wouldn't be surprised if other direct uses of gen_lowpart have similar problems. > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > index f79b4c142ae..624a00a4f32 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O -fdump-tree-fre1" } */ > +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */ > > struct A { float x, y; }; > struct B { struct A u; }; So this may compromise the original intent of this test. What I would suggest instead is to create a new test with the dg-do & dg-options you want with a #include "ssa-fre-44.c". So to move forward. Let's confirm the path we take through extract_low_bits matches expectations and fixup the testsuite change. Jeff
On 2/27/24 21:51, Li, Pan2 wrote: >> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >> return NULL_RTX; >> if (!targetm.modes_tieable_p (int_mode, mode)) >> return NULL_RTX; > > Yes, will return NULL_RTX for in the first if, given src_int_mode is E_DImode while src_mode is > E_V2SFmode and mode is E_V4QImode. The extract_low_bits convert the modes E_V2SFmode/E_V4QImode > to E_DImode/E_SImode in advance before tieable checking, validate_subreg and gen_lowpart. > > Not sure if my understanding is correct but looks extract_low_bits cannot take care of vector modes > up to a point because vector modes are always untieable to its' int mode, and then return NULL_RTX. Well, the code tries to turn the vector mode into a suitable integer mode via int_mode_for_mode. That takes a mode, including vector modes and tries to find an integer mode of the exact same size. So it's going to check if V2SF can be tied to DI and V4QI with SI. I suspect those are going to fail for RISC-V as those aren't tieable. Jeff
> So it's going to check if V2SF can be tied to DI and V4QI with SI. I > suspect those are going to fail for RISC-V as those aren't tieable. Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V. static bool riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) { /* We don't allow different REG_CLASS modes tieable since it will cause ICE in register allocation (RA). E.g. V2SI and DI are not tieable. */ if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) return false; return (mode1 == mode2 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT && GET_MODE_CLASS (mode2) == MODE_FLOAT)); } Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Thursday, February 29, 2024 1:33 AM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 2/27/24 21:51, Li, Pan2 wrote: >> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >> return NULL_RTX; >> if (!targetm.modes_tieable_p (int_mode, mode)) >> return NULL_RTX; > > Yes, will return NULL_RTX for in the first if, given src_int_mode is E_DImode while src_mode is > E_V2SFmode and mode is E_V4QImode. The extract_low_bits convert the modes E_V2SFmode/E_V4QImode > to E_DImode/E_SImode in advance before tieable checking, validate_subreg and gen_lowpart. > > Not sure if my understanding is correct but looks extract_low_bits cannot take care of vector modes > up to a point because vector modes are always untieable to its' int mode, and then return NULL_RTX. Well, the code tries to turn the vector mode into a suitable integer mode via int_mode_for_mode. That takes a mode, including vector modes and tries to find an integer mode of the exact same size. So it's going to check if V2SF can be tied to DI and V4QI with SI. I suspect those are going to fail for RISC-V as those aren't tieable. Jeff
On 2/29/24 02:38, Li, Pan2 wrote: >> So it's going to check if V2SF can be tied to DI and V4QI with SI. I >> suspect those are going to fail for RISC-V as those aren't tieable. > > Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V. > > static bool > riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) > { > /* We don't allow different REG_CLASS modes tieable since it > will cause ICE in register allocation (RA). > E.g. V2SI and DI are not tieable. */ > if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) > return false; > return (mode1 == mode2 > || !(GET_MODE_CLASS (mode1) == MODE_FLOAT > && GET_MODE_CLASS (mode2) == MODE_FLOAT)); > } Yes, but what we set tieable is e.g. V4QI and V2SF. I suggested a target band-aid before: diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 799d7919a4a..982ca1a4250 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -8208,6 +8208,11 @@ riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) E.g. V2SI and DI are not tieable. */ if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) return false; + if (GET_MODE_CLASS (GET_MODE_INNER (mode1)) == MODE_INT + && GET_MODE_CLASS (GET_MODE_INNER (mode2)) == MODE_FLOAT + && GET_MODE_SIZE (GET_MODE_INNER (mode1)) + != GET_MODE_SIZE (GET_MODE_INNER (mode2))) + return false; return (mode1 == mode2 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT && GET_MODE_CLASS (mode2) == MODE_FLOAT)); but I don't like that as it just works around something that I didn't even understand fully... Regards Robin
Yeah, talking about this with robin offline for this fix. > Yes, but what we set tieable is e.g. V4QI and V2SF. That comes from different code lines. Jeff would like to learn more about extract_low_bits, it will first convert to int_mode and then call the tieable_p. And I bet the V4QI and V2SF comes from the if condition for gen_lowpart. --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, copy_rtx (store_info->const_rhs)); else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) - && targetm.modes_tieable_p (read_mode, store_mode)) // <= V4QI and V2SF here. + && targetm.modes_tieable_p (read_mode, store_mode) + && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs), + subreg_lowpart_offset (read_mode, store_mode))) read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); else read_reg = extract_low_bits (read_mode, store_mode, Pan -----Original Message----- From: Robin Dapp <rdapp.gcc@gmail.com> Sent: Thursday, February 29, 2024 9:29 PM To: Li, Pan2 <pan2.li@intel.com>; Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org Cc: rdapp.gcc@gmail.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 2/29/24 02:38, Li, Pan2 wrote: >> So it's going to check if V2SF can be tied to DI and V4QI with SI. I >> suspect those are going to fail for RISC-V as those aren't tieable. > > Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V. > > static bool > riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) > { > /* We don't allow different REG_CLASS modes tieable since it > will cause ICE in register allocation (RA). > E.g. V2SI and DI are not tieable. */ > if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) > return false; > return (mode1 == mode2 > || !(GET_MODE_CLASS (mode1) == MODE_FLOAT > && GET_MODE_CLASS (mode2) == MODE_FLOAT)); > } Yes, but what we set tieable is e.g. V4QI and V2SF. I suggested a target band-aid before: diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 799d7919a4a..982ca1a4250 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -8208,6 +8208,11 @@ riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) E.g. V2SI and DI are not tieable. */ if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) return false; + if (GET_MODE_CLASS (GET_MODE_INNER (mode1)) == MODE_INT + && GET_MODE_CLASS (GET_MODE_INNER (mode2)) == MODE_FLOAT + && GET_MODE_SIZE (GET_MODE_INNER (mode1)) + != GET_MODE_SIZE (GET_MODE_INNER (mode2))) + return false; return (mode1 == mode2 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT && GET_MODE_CLASS (mode2) == MODE_FLOAT)); but I don't like that as it just works around something that I didn't even understand fully... Regards Robin
On 2/29/24 06:28, Robin Dapp wrote: > On 2/29/24 02:38, Li, Pan2 wrote: >>> So it's going to check if V2SF can be tied to DI and V4QI with SI. I >>> suspect those are going to fail for RISC-V as those aren't tieable. >> >> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V. >> >> static bool >> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) >> { >> /* We don't allow different REG_CLASS modes tieable since it >> will cause ICE in register allocation (RA). >> E.g. V2SI and DI are not tieable. */ >> if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) >> return false; >> return (mode1 == mode2 >> || !(GET_MODE_CLASS (mode1) == MODE_FLOAT >> && GET_MODE_CLASS (mode2) == MODE_FLOAT)); >> } > > Yes, but what we set tieable is e.g. V4QI and V2SF. But in the case of a vector modes, we can usually reinterpret the underlying bits in whatever mode we want and do any of the usual operations on those bits. In my mind that's fundamentally different than the int vs fp case. If we have an integer value in an FP register, we can't really operate on the value in any sensible way without first copying it over to the integer register file and vice-versa. Jeff
Thanks Jeff for comments. > But in the case of a vector modes, we can usually reinterpret the > underlying bits in whatever mode we want and do any of the usual > operations on those bits. Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. And then the different modes will return by gen_low_part. Unfortunately, there are some modes (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, and return NULL_RTX result in the final ICE. Thus, consider stage 4 I wonder if this is a acceptable fix, aka find some where to filter-out the invalid modes before goes to gen_low_part. Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Monday, March 4, 2024 6:47 AM To: Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 2/29/24 06:28, Robin Dapp wrote: > On 2/29/24 02:38, Li, Pan2 wrote: >>> So it's going to check if V2SF can be tied to DI and V4QI with SI. I >>> suspect those are going to fail for RISC-V as those aren't tieable. >> >> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V. >> >> static bool >> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) >> { >> /* We don't allow different REG_CLASS modes tieable since it >> will cause ICE in register allocation (RA). >> E.g. V2SI and DI are not tieable. */ >> if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) >> return false; >> return (mode1 == mode2 >> || !(GET_MODE_CLASS (mode1) == MODE_FLOAT >> && GET_MODE_CLASS (mode2) == MODE_FLOAT)); >> } > > Yes, but what we set tieable is e.g. V4QI and V2SF. But in the case of a vector modes, we can usually reinterpret the underlying bits in whatever mode we want and do any of the usual operations on those bits. In my mind that's fundamentally different than the int vs fp case. If we have an integer value in an FP register, we can't really operate on the value in any sensible way without first copying it over to the integer register file and vice-versa. Jeff
Hi Jeff, Is there any suggestion(s) for how to fix this ICE in the reasonable approach? Thanks a lot. Pan -----Original Message----- From: Li, Pan2 Sent: Tuesday, March 5, 2024 2:23 PM To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <Hongtao.Liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Thanks Jeff for comments. > But in the case of a vector modes, we can usually reinterpret the > underlying bits in whatever mode we want and do any of the usual > operations on those bits. Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. And then the different modes will return by gen_low_part. Unfortunately, there are some modes (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, and return NULL_RTX result in the final ICE. Thus, consider stage 4 I wonder if this is a acceptable fix, aka find some where to filter-out the invalid modes before goes to gen_low_part. Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Monday, March 4, 2024 6:47 AM To: Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 2/29/24 06:28, Robin Dapp wrote: > On 2/29/24 02:38, Li, Pan2 wrote: >>> So it's going to check if V2SF can be tied to DI and V4QI with SI. I >>> suspect those are going to fail for RISC-V as those aren't tieable. >> >> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V. >> >> static bool >> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) >> { >> /* We don't allow different REG_CLASS modes tieable since it >> will cause ICE in register allocation (RA). >> E.g. V2SI and DI are not tieable. */ >> if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) >> return false; >> return (mode1 == mode2 >> || !(GET_MODE_CLASS (mode1) == MODE_FLOAT >> && GET_MODE_CLASS (mode2) == MODE_FLOAT)); >> } > > Yes, but what we set tieable is e.g. V4QI and V2SF. But in the case of a vector modes, we can usually reinterpret the underlying bits in whatever mode we want and do any of the usual operations on those bits. In my mind that's fundamentally different than the int vs fp case. If we have an integer value in an FP register, we can't really operate on the value in any sensible way without first copying it over to the integer register file and vice-versa. Jeff
Sorry for disturbing, kindly ping for this ICE. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Tuesday, March 12, 2024 10:09 AM To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Hi Jeff, Is there any suggestion(s) for how to fix this ICE in the reasonable approach? Thanks a lot. Pan -----Original Message----- From: Li, Pan2 Sent: Tuesday, March 5, 2024 2:23 PM To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <Hongtao.Liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Thanks Jeff for comments. > But in the case of a vector modes, we can usually reinterpret the > underlying bits in whatever mode we want and do any of the usual > operations on those bits. Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. And then the different modes will return by gen_low_part. Unfortunately, there are some modes (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, and return NULL_RTX result in the final ICE. Thus, consider stage 4 I wonder if this is a acceptable fix, aka find some where to filter-out the invalid modes before goes to gen_low_part. Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Monday, March 4, 2024 6:47 AM To: Robin Dapp <rdapp.gcc@gmail.com>; Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 2/29/24 06:28, Robin Dapp wrote: > On 2/29/24 02:38, Li, Pan2 wrote: >>> So it's going to check if V2SF can be tied to DI and V4QI with SI. I >>> suspect those are going to fail for RISC-V as those aren't tieable. >> >> Yes, you are right. Different REG_CLASS are not allowed to be tieable in RISC-V. >> >> static bool >> riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2) >> { >> /* We don't allow different REG_CLASS modes tieable since it >> will cause ICE in register allocation (RA). >> E.g. V2SI and DI are not tieable. */ >> if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2)) >> return false; >> return (mode1 == mode2 >> || !(GET_MODE_CLASS (mode1) == MODE_FLOAT >> && GET_MODE_CLASS (mode2) == MODE_FLOAT)); >> } > > Yes, but what we set tieable is e.g. V4QI and V2SF. But in the case of a vector modes, we can usually reinterpret the underlying bits in whatever mode we want and do any of the usual operations on those bits. In my mind that's fundamentally different than the int vs fp case. If we have an integer value in an FP register, we can't really operate on the value in any sensible way without first copying it over to the integer register file and vice-versa. Jeff
On 3/4/24 11:22 PM, Li, Pan2 wrote: > Thanks Jeff for comments. > >> But in the case of a vector modes, we can usually reinterpret the >> underlying bits in whatever mode we want and do any of the usual >> operations on those bits. > > Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. > And then the different modes will return by gen_low_part. Unfortunately, there are some modes > (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, > and return NULL_RTX result in the final ICE. That doesn't make a lot of sense to me. Even for vlen=128 I would have expected that we can still use a subreg to access low bits. After all we might have had a V16QI vector and done a reduction of some sort storing the result in the first element and we have to be able to extract that result and move it around. I'm not real keen on a target workaround. While extremely safe, I wouldn't be surprised if other ports could trigger the ICE and we'd end up patching up multiple targets for what is, IMHO, a more generic issue. As Richi noted using validate_subreg here isn't great. Does it work to factor out this code from extract_low_bits: > if (!int_mode_for_mode (src_mode).exists (&src_int_mode) > || !int_mode_for_mode (mode).exists (&int_mode)) > return NULL_RTX; > > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; And use that in the condition (and in extract_low_bits rather than duplicating the code)? jeff ps. No need to apologize for the pings. This completely fell off my radar.
Thanks Jeff for comments. > As Richi noted using validate_subreg here isn't great. Does it work to > factor out this code from extract_low_bits > >> if (!int_mode_for_mode (src_mode).exists (&src_int_mode) >> || !int_mode_for_mode (mode).exists (&int_mode)) >> return NULL_RTX; >> >> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >> return NULL_RTX; >> if (!targetm.modes_tieable_p (int_mode, mode)) >> return NULL_RTX; > And use that in the condition (and in extract_low_bits rather than > duplicating the code)? It can solve the ICE but will forbid all vector modes goes gen_lowpart. Actually only the vector mode size is less than reg nature size will trigger the ICE. Thus, how about just add one more condition before goes to gen_lowpart as below? Feel free to correct me if any misunderstandings. 😉! diff --git a/gcc/dse.cc b/gcc/dse.cc index edc7a1dfecf..258d2ccc299 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, copy_rtx (store_info->const_rhs)); else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) - && targetm.modes_tieable_p (read_mode, store_mode)) + && targetm.modes_tieable_p (read_mode, store_mode) + /* It's invalid in validate_subreg if read_mode size is < reg natural. */ + && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode))) read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); else read_reg = extract_low_bits (read_mode, store_mode, Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Saturday, March 23, 2024 2:54 AM To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 3/4/24 11:22 PM, Li, Pan2 wrote: > Thanks Jeff for comments. > >> But in the case of a vector modes, we can usually reinterpret the >> underlying bits in whatever mode we want and do any of the usual >> operations on those bits. > > Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. > And then the different modes will return by gen_low_part. Unfortunately, there are some modes > (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, > and return NULL_RTX result in the final ICE. That doesn't make a lot of sense to me. Even for vlen=128 I would have expected that we can still use a subreg to access low bits. After all we might have had a V16QI vector and done a reduction of some sort storing the result in the first element and we have to be able to extract that result and move it around. I'm not real keen on a target workaround. While extremely safe, I wouldn't be surprised if other ports could trigger the ICE and we'd end up patching up multiple targets for what is, IMHO, a more generic issue. As Richi noted using validate_subreg here isn't great. Does it work to factor out this code from extract_low_bits: > if (!int_mode_for_mode (src_mode).exists (&src_int_mode) > || !int_mode_for_mode (mode).exists (&int_mode)) > return NULL_RTX; > > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; And use that in the condition (and in extract_low_bits rather than duplicating the code)? jeff ps. No need to apologize for the pings. This completely fell off my radar.
Kindly ping for this ice. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Saturday, March 23, 2024 1:45 PM To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Thanks Jeff for comments. > As Richi noted using validate_subreg here isn't great. Does it work to > factor out this code from extract_low_bits > >> if (!int_mode_for_mode (src_mode).exists (&src_int_mode) >> || !int_mode_for_mode (mode).exists (&int_mode)) >> return NULL_RTX; >> >> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >> return NULL_RTX; >> if (!targetm.modes_tieable_p (int_mode, mode)) >> return NULL_RTX; > And use that in the condition (and in extract_low_bits rather than > duplicating the code)? It can solve the ICE but will forbid all vector modes goes gen_lowpart. Actually only the vector mode size is less than reg nature size will trigger the ICE. Thus, how about just add one more condition before goes to gen_lowpart as below? Feel free to correct me if any misunderstandings. 😉! diff --git a/gcc/dse.cc b/gcc/dse.cc index edc7a1dfecf..258d2ccc299 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, copy_rtx (store_info->const_rhs)); else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) - && targetm.modes_tieable_p (read_mode, store_mode)) + && targetm.modes_tieable_p (read_mode, store_mode) + /* It's invalid in validate_subreg if read_mode size is < reg natural. */ + && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode))) read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); else read_reg = extract_low_bits (read_mode, store_mode, Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Saturday, March 23, 2024 2:54 AM To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 3/4/24 11:22 PM, Li, Pan2 wrote: > Thanks Jeff for comments. > >> But in the case of a vector modes, we can usually reinterpret the >> underlying bits in whatever mode we want and do any of the usual >> operations on those bits. > > Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. > And then the different modes will return by gen_low_part. Unfortunately, there are some modes > (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, > and return NULL_RTX result in the final ICE. That doesn't make a lot of sense to me. Even for vlen=128 I would have expected that we can still use a subreg to access low bits. After all we might have had a V16QI vector and done a reduction of some sort storing the result in the first element and we have to be able to extract that result and move it around. I'm not real keen on a target workaround. While extremely safe, I wouldn't be surprised if other ports could trigger the ICE and we'd end up patching up multiple targets for what is, IMHO, a more generic issue. As Richi noted using validate_subreg here isn't great. Does it work to factor out this code from extract_low_bits: > if (!int_mode_for_mode (src_mode).exists (&src_int_mode) > || !int_mode_for_mode (mode).exists (&int_mode)) > return NULL_RTX; > > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; And use that in the condition (and in extract_low_bits rather than duplicating the code)? jeff ps. No need to apologize for the pings. This completely fell off my radar.
Kindly ping^ for this ice fix. Pan -----Original Message----- From: Li, Pan2 Sent: Saturday, April 6, 2024 8:02 PM To: Li, Pan2 <pan2.li@intel.com>; Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <Hongtao.Liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Kindly ping for this ice. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Saturday, March 23, 2024 1:45 PM To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Thanks Jeff for comments. > As Richi noted using validate_subreg here isn't great. Does it work to > factor out this code from extract_low_bits > >> if (!int_mode_for_mode (src_mode).exists (&src_int_mode) >> || !int_mode_for_mode (mode).exists (&int_mode)) >> return NULL_RTX; >> >> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >> return NULL_RTX; >> if (!targetm.modes_tieable_p (int_mode, mode)) >> return NULL_RTX; > And use that in the condition (and in extract_low_bits rather than > duplicating the code)? It can solve the ICE but will forbid all vector modes goes gen_lowpart. Actually only the vector mode size is less than reg nature size will trigger the ICE. Thus, how about just add one more condition before goes to gen_lowpart as below? Feel free to correct me if any misunderstandings. 😉! diff --git a/gcc/dse.cc b/gcc/dse.cc index edc7a1dfecf..258d2ccc299 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, copy_rtx (store_info->const_rhs)); else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) - && targetm.modes_tieable_p (read_mode, store_mode)) + && targetm.modes_tieable_p (read_mode, store_mode) + /* It's invalid in validate_subreg if read_mode size is < reg natural. */ + && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode))) read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); else read_reg = extract_low_bits (read_mode, store_mode, Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Saturday, March 23, 2024 2:54 AM To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 3/4/24 11:22 PM, Li, Pan2 wrote: > Thanks Jeff for comments. > >> But in the case of a vector modes, we can usually reinterpret the >> underlying bits in whatever mode we want and do any of the usual >> operations on those bits. > > Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. > And then the different modes will return by gen_low_part. Unfortunately, there are some modes > (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, > and return NULL_RTX result in the final ICE. That doesn't make a lot of sense to me. Even for vlen=128 I would have expected that we can still use a subreg to access low bits. After all we might have had a V16QI vector and done a reduction of some sort storing the result in the first element and we have to be able to extract that result and move it around. I'm not real keen on a target workaround. While extremely safe, I wouldn't be surprised if other ports could trigger the ICE and we'd end up patching up multiple targets for what is, IMHO, a more generic issue. As Richi noted using validate_subreg here isn't great. Does it work to factor out this code from extract_low_bits: > if (!int_mode_for_mode (src_mode).exists (&src_int_mode) > || !int_mode_for_mode (mode).exists (&int_mode)) > return NULL_RTX; > > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; And use that in the condition (and in extract_low_bits rather than duplicating the code)? jeff ps. No need to apologize for the pings. This completely fell off my radar.
Kindly ping^^ for this ice fix. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Thursday, April 18, 2024 9:46 AM To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Kindly ping^ for this ice fix. Pan -----Original Message----- From: Li, Pan2 Sent: Saturday, April 6, 2024 8:02 PM To: Li, Pan2 <pan2.li@intel.com>; Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <Hongtao.Liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Kindly ping for this ice. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Saturday, March 23, 2024 1:45 PM To: Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val Thanks Jeff for comments. > As Richi noted using validate_subreg here isn't great. Does it work to > factor out this code from extract_low_bits > >> if (!int_mode_for_mode (src_mode).exists (&src_int_mode) >> || !int_mode_for_mode (mode).exists (&int_mode)) >> return NULL_RTX; >> >> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >> return NULL_RTX; >> if (!targetm.modes_tieable_p (int_mode, mode)) >> return NULL_RTX; > And use that in the condition (and in extract_low_bits rather than > duplicating the code)? It can solve the ICE but will forbid all vector modes goes gen_lowpart. Actually only the vector mode size is less than reg nature size will trigger the ICE. Thus, how about just add one more condition before goes to gen_lowpart as below? Feel free to correct me if any misunderstandings. 😉! diff --git a/gcc/dse.cc b/gcc/dse.cc index edc7a1dfecf..258d2ccc299 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, copy_rtx (store_info->const_rhs)); else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) - && targetm.modes_tieable_p (read_mode, store_mode)) + && targetm.modes_tieable_p (read_mode, store_mode) + /* It's invalid in validate_subreg if read_mode size is < reg natural. */ + && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode))) read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); else read_reg = extract_low_bits (read_mode, store_mode, Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Saturday, March 23, 2024 2:54 AM To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 3/4/24 11:22 PM, Li, Pan2 wrote: > Thanks Jeff for comments. > >> But in the case of a vector modes, we can usually reinterpret the >> underlying bits in whatever mode we want and do any of the usual >> operations on those bits. > > Yes, I think that is why we can allow vector mode in get_stored_val if my understanding is correct. > And then the different modes will return by gen_low_part. Unfortunately, there are some modes > (less than a vector bit size like V2SF, V2QI for vlen=128) are considered as invalid by validate_subreg, > and return NULL_RTX result in the final ICE. That doesn't make a lot of sense to me. Even for vlen=128 I would have expected that we can still use a subreg to access low bits. After all we might have had a V16QI vector and done a reduction of some sort storing the result in the first element and we have to be able to extract that result and move it around. I'm not real keen on a target workaround. While extremely safe, I wouldn't be surprised if other ports could trigger the ICE and we'd end up patching up multiple targets for what is, IMHO, a more generic issue. As Richi noted using validate_subreg here isn't great. Does it work to factor out this code from extract_low_bits: > if (!int_mode_for_mode (src_mode).exists (&src_int_mode) > || !int_mode_for_mode (mode).exists (&int_mode)) > return NULL_RTX; > > if (!targetm.modes_tieable_p (src_int_mode, src_mode)) > return NULL_RTX; > if (!targetm.modes_tieable_p (int_mode, mode)) > return NULL_RTX; And use that in the condition (and in extract_low_bits rather than duplicating the code)? jeff ps. No need to apologize for the pings. This completely fell off my radar.
On 3/22/24 11:45 PM, Li, Pan2 wrote: > Thanks Jeff for comments. > >> As Richi noted using validate_subreg here isn't great. Does it work to >> factor out this code from extract_low_bits >> >>> if (!int_mode_for_mode (src_mode).exists (&src_int_mode) >>> || !int_mode_for_mode (mode).exists (&int_mode)) >>> return NULL_RTX; >>> >>> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >>> return NULL_RTX; >>> if (!targetm.modes_tieable_p (int_mode, mode)) >>> return NULL_RTX; > >> And use that in the condition (and in extract_low_bits rather than >> duplicating the code)? > > It can solve the ICE but will forbid all vector modes goes gen_lowpart. > Actually only the vector mode size is less than reg nature size will trigger the ICE. > Thus, how about just add one more condition before goes to gen_lowpart as below? > > Feel free to correct me if any misunderstandings. 😉! > > diff --git a/gcc/dse.cc b/gcc/dse.cc > index edc7a1dfecf..258d2ccc299 100644 > --- a/gcc/dse.cc > +++ b/gcc/dse.cc > @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, > copy_rtx (store_info->const_rhs)); > else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) > && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) > - && targetm.modes_tieable_p (read_mode, store_mode)) > + && targetm.modes_tieable_p (read_mode, store_mode) > + /* It's invalid in validate_subreg if read_mode size is < reg natural. */ > + && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode))) > read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); > else > read_reg = extract_low_bits (read_mode, store_mode, So how about this. I'll ack this for the trunk, but not for gcc-14 (at this time). We can revisit for gcc-14 after it's been on the trunk a bit. Realistically that likely means gcc-14.2 at the end of the summer rather than gcc-14.1 which is due in roughly a week. Jeff
> So how about this. I'll ack this for the trunk, but not for gcc-14 (at > this time). We can revisit for gcc-14 after it's been on the trunk a > bit. Realistically that likely means gcc-14.2 at the end of the summer > rather than gcc-14.1 which is due in roughly a week. Thanks Jeff, I will prepare a v3 for this with full and well tested results. Pan -----Original Message----- From: Jeff Law <jeffreyalaw@gmail.com> Sent: Monday, April 29, 2024 11:20 PM To: Li, Pan2 <pan2.li@intel.com>; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Wang, Yanzhang <yanzhang.wang@intel.com>; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v2] DSE: Bugfix ICE after allow vector type in get_stored_val On 3/22/24 11:45 PM, Li, Pan2 wrote: > Thanks Jeff for comments. > >> As Richi noted using validate_subreg here isn't great. Does it work to >> factor out this code from extract_low_bits >> >>> if (!int_mode_for_mode (src_mode).exists (&src_int_mode) >>> || !int_mode_for_mode (mode).exists (&int_mode)) >>> return NULL_RTX; >>> >>> if (!targetm.modes_tieable_p (src_int_mode, src_mode)) >>> return NULL_RTX; >>> if (!targetm.modes_tieable_p (int_mode, mode)) >>> return NULL_RTX; > >> And use that in the condition (and in extract_low_bits rather than >> duplicating the code)? > > It can solve the ICE but will forbid all vector modes goes gen_lowpart. > Actually only the vector mode size is less than reg nature size will trigger the ICE. > Thus, how about just add one more condition before goes to gen_lowpart as below? > > Feel free to correct me if any misunderstandings. 😉! > > diff --git a/gcc/dse.cc b/gcc/dse.cc > index edc7a1dfecf..258d2ccc299 100644 > --- a/gcc/dse.cc > +++ b/gcc/dse.cc > @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, > copy_rtx (store_info->const_rhs)); > else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) > && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) > - && targetm.modes_tieable_p (read_mode, store_mode)) > + && targetm.modes_tieable_p (read_mode, store_mode) > + /* It's invalid in validate_subreg if read_mode size is < reg natural. */ > + && known_ge (GET_MODE_SIZE (read_mode), REGMODE_NATURAL_SIZE (read_mode))) > read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); > else > read_reg = extract_low_bits (read_mode, store_mode, So how about this. I'll ack this for the trunk, but not for gcc-14 (at this time). We can revisit for gcc-14 after it's been on the trunk a bit. Realistically that likely means gcc-14.2 at the end of the summer rather than gcc-14.1 which is due in roughly a week. Jeff
diff --git a/gcc/dse.cc b/gcc/dse.cc index edc7a1dfecf..1596da91da0 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode read_mode, copy_rtx (store_info->const_rhs)); else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode) && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode)) - && targetm.modes_tieable_p (read_mode, store_mode)) + && targetm.modes_tieable_p (read_mode, store_mode) + && validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs), + subreg_lowpart_offset (read_mode, store_mode))) read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs)); else read_reg = extract_low_bits (read_mode, store_mode, diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c index f79b4c142ae..624a00a4f32 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-fre1" } */ +/* { dg-options "-O -fdump-tree-fre1 -O3 -ftree-vectorize" } */ struct A { float x, y; }; struct B { struct A u; }; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c new file mode 100644 index 00000000000..5bb00b8f587 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c @@ -0,0 +1,22 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */ + +struct A { float x, y; }; +struct B { struct A u; }; + +extern void bar (struct A *); + +float +f3 (struct B *x, int y) +{ + struct A p = {1.0f, 2.0f}; + struct A *q = &x[y].u; + + __builtin_memcpy (&q->x, &p.x, sizeof (float)); + __builtin_memcpy (&q->y, &p.y, sizeof (float)); + + bar (&p); + + return x[y].u.x + x[y].u.y; +}