diff mbox series

PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514

Message ID trinity-558ac6fa-645e-4b69-bd92-ed980a8db626-1623274785526@3c-app-gmx-bs42
State New
Headers show
Series PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514 | expand

Commit Message

Harald Anlauf June 9, 2021, 9:39 p.m. UTC
Dear Fortranners,

we should be able to simplify the length of a substring with known
constant bounds.  The attached patch adds this.

Regtested on x86_64-pc-linux-gnu.

OK for mainline?  Since this should be rather safe, to at least 11-branch?

Thanks,
Harald


Fortran - simplify length of substring with constant bounds

gcc/fortran/ChangeLog:

	PR fortran/100950
	* simplify.c (substring_has_constant_len): New.
	(gfc_simplify_len): Handle case of substrings with constant
	bounds.

gcc/testsuite/ChangeLog:

	PR fortran/100950
	* gfortran.dg/pr100950.f90: New test.

Comments

Bernhard Reutner-Fischer June 10, 2021, 10:24 a.m. UTC | #1
On Wed, 9 Jun 2021 23:39:45 +0200
Harald Anlauf via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:

> diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c
> index c27b47aa98f..016ec259518 100644
> --- a/gcc/fortran/simplify.c
> +++ b/gcc/fortran/simplify.c
> @@ -4512,6 +4512,60 @@ gfc_simplify_leadz (gfc_expr *e)
>  }
> 
> 
> +/* Check for constant length of a substring.  */
> +
> +static bool
> +substring_has_constant_len (gfc_expr *e)
> +{
> +  ptrdiff_t istart, iend;
> +  size_t length;
> +  bool equal_length = false;
> +
> +  if (e->ts.type != BT_CHARACTER
> +      || !(e->ref && e->ref->type == REF_SUBSTRING)

iff we ever can get here with e->ref == NULL then the below will not
work too well. If so then maybe
  if (e->ts.type != BT_CHARACTER
      || ! e->ref
      || e->ref->type != REF_SUBSTRING

?

> +      || !e->ref->u.ss.start
> +      || e->ref->u.ss.start->expr_type != EXPR_CONSTANT
> +      || !e->ref->u.ss.end
> +      || e->ref->u.ss.end->expr_type != EXPR_CONSTANT
> +      || !e->ref->u.ss.length
> +      || !e->ref->u.ss.length->length
> +      || e->ref->u.ss.length->length->expr_type != EXPR_CONSTANT)
> +    return false;
> +
> +  /* Basic checks on substring starting and ending indices.  */
> +  if (!gfc_resolve_substring (e->ref, &equal_length))
> +    return false;
> +
> +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
> +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
> +  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
> +
> +  if (istart <= iend)
> +    {
> +      if (istart < 1)
> +	{
> +	  gfc_error ("Substring start index (%ld) at %L below 1",
> +		     (long) istart, &e->ref->u.ss.start->where);
> +	  return false;
> +	}
> +      if (iend > (ssize_t) length)
> +	{
> +	  gfc_error ("Substring end index (%ld) at %L exceeds string "
> +		     "length", (long) iend, &e->ref->u.ss.end->where);
> +	  return false;
> +	}
> +      length = iend - istart + 1;
> +    }
> +  else
> +    length = 0;
> +
> +  /* Fix substring length.  */
> +  e->value.character.length = length;
> +
> +  return true;
> +}
> +
> +
>  gfc_expr *
>  gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
>  {
> @@ -4547,6 +4601,13 @@ gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
>         of the unlimited polymorphic entity.  To get the _len component the last
>         _data ref needs to be stripped and a ref to the _len component added.  */
>      return gfc_get_len_component (e->symtree->n.sym->assoc->target, k);
> +  else if (substring_has_constant_len (e))
> +    {
> +      result = gfc_get_constant_expr (BT_INTEGER, k, &e->where);
> +      mpz_set_si (result->value.integer,
> +		  e->value.character.length);

I think the mpz_set_si args above fit on one line.

btw.. there's a commentary typo in add_init_expr_to_sym():
s/skeep/skip/

thanks,

> +      return range_check (result, "LEN");
> +    }
>    else
>      return NULL;
>  }
> diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90
> new file mode 100644
> index 00000000000..f06db45b0b4
> --- /dev/null
> +++ b/gcc/testsuite/gfortran.dg/pr100950.f90
> @@ -0,0 +1,18 @@
> +! { dg-do run }
> +! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
> +
> +program p
> +  character(8), parameter :: u = "123"
> +  character(8)            :: x = "", s
> +  character(2)            :: w(2) = [character(len(x(3:4))) :: 'a','b' ]
> +  character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ]
> +  character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ]
> +  if (len (y) /= 2) stop 1
> +  if (len (z) /= 2) stop 2
> +  if (any (w /= y)) stop 3
> +  if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2)  stop 4
> +  if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2)  stop 5
> +  if (any ([character(len(x(3:4))) :: 'a','b' ]  /= y)) stop 6
> +  write(s,*) [character(len(x(3:4))) :: 'a','b' ]
> +  if (s /= " a b    ") stop 7
> +end
Bernhard Reutner-Fischer June 10, 2021, 12:47 p.m. UTC | #2
On Thu, 10 Jun 2021 12:24:35 +0200
Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote:

> On Wed, 9 Jun 2021 23:39:45 +0200
> Harald Anlauf via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:

> > +/* Check for constant length of a substring.  */
> > +
> > +static bool
> > +substring_has_constant_len (gfc_expr *e)
> > +{
> > +  ptrdiff_t istart, iend;
> > +  size_t length;
> > +  bool equal_length = false;
> > +
> > +  if (e->ts.type != BT_CHARACTER
> > +      || !(e->ref && e->ref->type == REF_SUBSTRING)  
> 
> iff we ever can get here with e->ref == NULL then the below will not
> work too well. If so then maybe
>   if (e->ts.type != BT_CHARACTER
>       || ! e->ref
>       || e->ref->type != REF_SUBSTRING
> 
> ?

Not sure what i was reading, maybe i read || instead of && in the
braced condition. Your initial version works equally well of course
although it's obviously harder to parse for at least some :)
thanks,
Harald Anlauf June 10, 2021, 6:52 p.m. UTC | #3
Hi Bernhard,

> > +static bool
> > +substring_has_constant_len (gfc_expr *e)
> > +{
> > +  ptrdiff_t istart, iend;
> > +  size_t length;
> > +  bool equal_length = false;
> > +
> > +  if (e->ts.type != BT_CHARACTER
> > +      || !(e->ref && e->ref->type == REF_SUBSTRING)
>
> iff we ever can get here with e->ref == NULL then the below will not
> work too well. If so then maybe
>   if (e->ts.type != BT_CHARACTER
>       || ! e->ref
>       || e->ref->type != REF_SUBSTRING
>
> ?

as you already realized, the logic was fine, but probably less
readable than your version.  I've changed the code accordingly.

> > +  else if (substring_has_constant_len (e))
> > +    {
> > +      result = gfc_get_constant_expr (BT_INTEGER, k, &e->where);
> > +      mpz_set_si (result->value.integer,
> > +		  e->value.character.length);
>
> I think the mpz_set_si args above fit on one line.

That's true.

Since this block is exactly the same as for constant strings,
which is handled in the first condition, I've thought some more
and am convinced now that these two can be fused.  Done now.

I've also added two cornercases to the testcase, and regtested again.

> btw.. there's a commentary typo in add_init_expr_to_sym():
> s/skeep/skip/

That is a completely unrelated issue in a different file, right?

Thanks for your constructive comments!

Harald
Bernhard Reutner-Fischer June 11, 2021, 6:02 a.m. UTC | #4
On 10 June 2021 20:52:12 CEST, Harald Anlauf <anlauf@gmx.de> wrote:

>> I think the mpz_set_si args above fit on one line.
>
>That's true.
>
>Since this block is exactly the same as for constant strings,
>which is handled in the first condition, I've thought some more
>and am convinced now that these two can be fused.  Done now.

Thanks.
Btw.. I know that we cast hwi to long int often and use %ld but think there is a HOST_WIDE_INT_PRINT_DEC format macro to print a hwi.

>I've also added two cornercases to the testcase, and regtested again.
>
>> btw.. there's a commentary typo in add_init_expr_to_sym():
>> s/skeep/skip/
>
>That is a completely unrelated issue in a different file, right?

Completely unrelated, yes.
>
>Thanks for your constructive comments!

thanks for the patch!
Harald Anlauf June 18, 2021, 8:47 p.m. UTC | #5
*PING*

> Gesendet: Mittwoch, 09. Juni 2021 um 23:39 Uhr
> Von: "Harald Anlauf" <anlauf@gmx.de>
> An: "fortran" <fortran@gcc.gnu.org>, "gcc-patches" <gcc-patches@gcc.gnu.org>
> Betreff: [PATCH] PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
>
> Dear Fortranners,
>
> we should be able to simplify the length of a substring with known
> constant bounds.  The attached patch adds this.
>
> Regtested on x86_64-pc-linux-gnu.
>
> OK for mainline?  Since this should be rather safe, to at least 11-branch?
>
> Thanks,
> Harald
>
>
> Fortran - simplify length of substring with constant bounds
>
> gcc/fortran/ChangeLog:
>
> 	PR fortran/100950
> 	* simplify.c (substring_has_constant_len): New.
> 	(gfc_simplify_len): Handle case of substrings with constant
> 	bounds.
>
> gcc/testsuite/ChangeLog:
>
> 	PR fortran/100950
> 	* gfortran.dg/pr100950.f90: New test.
>
>
Tobias Burnus June 21, 2021, 12:12 p.m. UTC | #6
Hi Harald,

sorry for being way behind my review duties :-(

On 10.06.21 20:52, Harald Anlauf via Fortran wrote:
> +static bool
> +substring_has_constant_len (gfc_expr *e)
> +{
> +  ptrdiff_t istart, iend;
> +  size_t length;
> +  bool equal_length = false;
> +
> +  if (e->ts.type != BT_CHARACTER
> +      || !e->ref
> +      || e->ref->type != REF_SUBSTRING

Is there a reason why you do not handle:

type t
   character(len=5) :: str1
   character(len=:), allocatable :: str2
end type
type(t) :: x

allocate(x%str2, source="abd")
if (len (x%str)) /= 1) ...
if (len (x%str2(1:2) /= 2) ...
etc.

Namely: Search the last_ref = expr->ref->next->next ...?
and then check that lastref?

   * * *

Slightly unrelated: I think the following does not violate
F2018's R916 / C923 – but is rejected, namely:
   R916  type-param-inquiry  is  designator % type-param-name
the latter is 'len' or 'kind' for intrinsic types. And:
   R901  designator is ...
                    or substring
But

character(len=5) :: str
print *, str(1:3)%len
end

fails with

     2 | print *, str(1:3)%len
       |                  1
Error: Syntax error in PRINT statement at (1)


Assuming you don't want to handle it, can you open a new PR?
Thanks!

  * * *

That's in so far related to your patch as last_ref would
then be the last ref before ref->next == NULL or
before ref->next->type == REF_INQUIRY

> +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
> +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
> +  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
> +
> +  if (istart <= iend)
> +    {
> +      if (istart < 1)
> +     {
> +       gfc_error ("Substring start index (%ld) at %L below 1",
> +                  (long) istart, &e->ref->u.ss.start->where);

As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC.

(It probably only matters on Windows which uses long == int = 32bit for
strings longer than INT_MAX.)

Thanks,

Tobias

-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
Harald Anlauf July 12, 2021, 9:23 p.m. UTC | #7
Hi Tobias,

> On 10.06.21 20:52, Harald Anlauf via Fortran wrote:
> > +static bool
> > +substring_has_constant_len (gfc_expr *e)
> > +{
> > +  ptrdiff_t istart, iend;
> > +  size_t length;
> > +  bool equal_length = false;
> > +
> > +  if (e->ts.type != BT_CHARACTER
> > +      || !e->ref
> > +      || e->ref->type != REF_SUBSTRING
> 
> Is there a reason why you do not handle:
> 
> type t
>    character(len=5) :: str1
>    character(len=:), allocatable :: str2
> end type
> type(t) :: x
> 
> allocate(x%str2, source="abd")
> if (len (x%str)) /= 1) ...
> if (len (x%str2(1:2) /= 2) ...
> etc.
> 
> Namely: Search the last_ref = expr->ref->next->next ...?
> and then check that lastref?

I was assuming that the argument passed to LEN() is already the ultimate
component for the case of substrings, and I was unable to find a case which
requires implementing that iteration.  The cases you provided do not seem
to apply here:

- derived type component str1, which is a string of given length, poses no
  problem.  I added a case to the testcase, see attached updated patch.

- derived type component str2 has deferred length.  I do not see that the
  simplification can be applied here, as the allocation could lead to str2
  being too short, and we do not want to simplify invalid code, such as:

type t
   character(len=:), allocatable :: str2
end type
type(t) :: x

allocate(x%str2, source="z")
if (len (x%str2(1:2)) /= 2) stop 1
end

If we want this to be catchable by bounds checking, we need to punt
at simplification of this.  The updated patch skips deferred strings.

>    * * *
> 
> Slightly unrelated: I think the following does not violate
> F2018's R916 / C923 – but is rejected, namely:
>    R916  type-param-inquiry  is  designator % type-param-name
> the latter is 'len' or 'kind' for intrinsic types. And:
>    R901  designator is ...
>                     or substring
> But
> 
> character(len=5) :: str
> print *, str(1:3)%len
> end
> 
> fails with
> 
>      2 | print *, str(1:3)%len
>        |                  1
> Error: Syntax error in PRINT statement at (1)
> 
> 
> Assuming you don't want to handle it, can you open a new PR?
> Thanks!

Good point.  I'd rather open a separate PR for this, though.

> > +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
> > +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
> > +  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
> > +
> > +  if (istart <= iend)
> > +    {
> > +      if (istart < 1)
> > +     {
> > +       gfc_error ("Substring start index (%ld) at %L below 1",
> > +                  (long) istart, &e->ref->u.ss.start->where);
> 
> As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC.
> 
> (It probably only matters on Windows which uses long == int = 32bit for
> strings longer than INT_MAX.)

I am not familiar enough with Windows.  What is HOST_WIDE_INT
on that system?  (As compared to e.g. size_t, ptrdiff_t).

The (slightly) updated patch regtests fine.

Thanks,
Harald
Harald Anlauf Aug. 3, 2021, 9:17 p.m. UTC | #8
Here's now my third attempt to fix this PR, taking into account
the comments by Tobias and Bernhard.

> > On 10.06.21 20:52, Harald Anlauf via Fortran wrote:
> > > +static bool
> > > +substring_has_constant_len (gfc_expr *e)
> > > +{
> > > +  ptrdiff_t istart, iend;
> > > +  size_t length;
> > > +  bool equal_length = false;
> > > +
> > > +  if (e->ts.type != BT_CHARACTER
> > > +      || !e->ref
> > > +      || e->ref->type != REF_SUBSTRING
> > 
> > Is there a reason why you do not handle:
> > 
> > type t
> >    character(len=5) :: str1
> >    character(len=:), allocatable :: str2
> > end type
> > type(t) :: x
> > 
> > allocate(x%str2, source="abd")
> > if (len (x%str)) /= 1) ...
> > if (len (x%str2(1:2) /= 2) ...
> > etc.
> > 
> > Namely: Search the last_ref = expr->ref->next->next ...?
> > and then check that lastref?

The mentioned search is now implemented.

Note, however, that gfc_simplify_len still won't handle neither
deferred strings nor their substrings.

I think there is nothing to simplify at compile time here.  Otherwise
there would be a conflict/inconsistency with type parameter inquiry,
see F2018:9.4.5(2):

"A deferred type parameter of a pointer that is not associated or
of an unallocated allocatable variable shall not be inquired about."

> >    * * *
> > 
> > Slightly unrelated: I think the following does not violate
> > F2018's R916 / C923 – but is rejected, namely:
> >    R916  type-param-inquiry  is  designator % type-param-name
> > the latter is 'len' or 'kind' for intrinsic types. And:
> >    R901  designator is ...
> >                     or substring
> > But
> > 
> > character(len=5) :: str
> > print *, str(1:3)%len
> > end
> > 
> > fails with
> > 
> >      2 | print *, str(1:3)%len
> >        |                  1
> > Error: Syntax error in PRINT statement at (1)
> > 
> > 
> > Assuming you don't want to handle it, can you open a new PR?
> > Thanks!

I tried to look into this, but there appear to be several unrelated
issues requiring a separate treatment.  I therefore opened:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735

> > > +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
> > > +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
> > > +  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
> > > +
> > > +  if (istart <= iend)
> > > +    {
> > > +      if (istart < 1)
> > > +     {
> > > +       gfc_error ("Substring start index (%ld) at %L below 1",
> > > +                  (long) istart, &e->ref->u.ss.start->where);
> > 
> > As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC.
> > 
> > (It probably only matters on Windows which uses long == int = 32bit for
> > strings longer than INT_MAX.)

Done.

The updated patch regtests fine.  OK?

Thanks,
Harald


Fortran - simplify length of substring with constant bounds

gcc/fortran/ChangeLog:

	PR fortran/100950
	* simplify.c (substring_has_constant_len): New.
	(gfc_simplify_len): Handle case of substrings with constant
	bounds.

gcc/testsuite/ChangeLog:

	PR fortran/100950
	* gfortran.dg/pr100950.f90: New test.
Harald Anlauf Aug. 11, 2021, 7:28 p.m. UTC | #9
*Ping*

> Gesendet: Dienstag, 03. August 2021 um 23:17 Uhr
> Von: "Harald Anlauf" <anlauf@gmx.de>
> An: "Harald Anlauf" <anlauf@gmx.de>
> Cc: "Tobias Burnus" <Tobias_Burnus@mentor.com>, "Bernhard Reutner-Fischer" <rep.dot.nop@gmail.com>, "Harald Anlauf via Gcc-patches" <gcc-patches@gcc.gnu.org>, "fortran" <fortran@gcc.gnu.org>
> Betreff: Re: [PATCH] PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
>
> Here's now my third attempt to fix this PR, taking into account
> the comments by Tobias and Bernhard.
> 
> > > On 10.06.21 20:52, Harald Anlauf via Fortran wrote:
> > > > +static bool
> > > > +substring_has_constant_len (gfc_expr *e)
> > > > +{
> > > > +  ptrdiff_t istart, iend;
> > > > +  size_t length;
> > > > +  bool equal_length = false;
> > > > +
> > > > +  if (e->ts.type != BT_CHARACTER
> > > > +      || !e->ref
> > > > +      || e->ref->type != REF_SUBSTRING
> > > 
> > > Is there a reason why you do not handle:
> > > 
> > > type t
> > >    character(len=5) :: str1
> > >    character(len=:), allocatable :: str2
> > > end type
> > > type(t) :: x
> > > 
> > > allocate(x%str2, source="abd")
> > > if (len (x%str)) /= 1) ...
> > > if (len (x%str2(1:2) /= 2) ...
> > > etc.
> > > 
> > > Namely: Search the last_ref = expr->ref->next->next ...?
> > > and then check that lastref?
> 
> The mentioned search is now implemented.
> 
> Note, however, that gfc_simplify_len still won't handle neither
> deferred strings nor their substrings.
> 
> I think there is nothing to simplify at compile time here.  Otherwise
> there would be a conflict/inconsistency with type parameter inquiry,
> see F2018:9.4.5(2):
> 
> "A deferred type parameter of a pointer that is not associated or
> of an unallocated allocatable variable shall not be inquired about."
> 
> > >    * * *
> > > 
> > > Slightly unrelated: I think the following does not violate
> > > F2018's R916 / C923 – but is rejected, namely:
> > >    R916  type-param-inquiry  is  designator % type-param-name
> > > the latter is 'len' or 'kind' for intrinsic types. And:
> > >    R901  designator is ...
> > >                     or substring
> > > But
> > > 
> > > character(len=5) :: str
> > > print *, str(1:3)%len
> > > end
> > > 
> > > fails with
> > > 
> > >      2 | print *, str(1:3)%len
> > >        |                  1
> > > Error: Syntax error in PRINT statement at (1)
> > > 
> > > 
> > > Assuming you don't want to handle it, can you open a new PR?
> > > Thanks!
> 
> I tried to look into this, but there appear to be several unrelated
> issues requiring a separate treatment.  I therefore opened:
> 
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735
> 
> > > > +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
> > > > +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
> > > > +  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
> > > > +
> > > > +  if (istart <= iend)
> > > > +    {
> > > > +      if (istart < 1)
> > > > +     {
> > > > +       gfc_error ("Substring start index (%ld) at %L below 1",
> > > > +                  (long) istart, &e->ref->u.ss.start->where);
> > > 
> > > As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC.
> > > 
> > > (It probably only matters on Windows which uses long == int = 32bit for
> > > strings longer than INT_MAX.)
> 
> Done.
> 
> The updated patch regtests fine.  OK?
> 
> Thanks,
> Harald
> 
> 
> Fortran - simplify length of substring with constant bounds
> 
> gcc/fortran/ChangeLog:
> 
> 	PR fortran/100950
> 	* simplify.c (substring_has_constant_len): New.
> 	(gfc_simplify_len): Handle case of substrings with constant
> 	bounds.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR fortran/100950
> 	* gfortran.dg/pr100950.f90: New test.
> 
>
Tobias Burnus Aug. 18, 2021, 10:22 a.m. UTC | #10
Hi Harald,

sorry for the belated review.

On 03.08.21 23:17, Harald Anlauf wrote:
>>> allocate(x%str2, source="abd")
>>> if (len (x%str)) /= 1) ...
>>> if (len (x%str2(1:2) /= 2) ...
>>> etc.
>>>
>>> Namely: Search the last_ref = expr->ref->next->next ...?
>>> and then check that lastref?
> The mentioned search is now implemented.
>
> Note, however, that gfc_simplify_len still won't handle neither
> deferred strings nor their substrings.
>
> I think there is nothing to simplify at compile time here.

Obviously, nonsubstrings cannot be simplified but I do not
see why  len(str(1:2))  cannot or should not be simplified.

(Not that I regard substring length inquiries as that common.)

Regarding:

> Otherwise
> there would be a conflict/inconsistency with type parameter inquiry,
> see F2018:9.4.5(2):
>
> "A deferred type parameter of a pointer that is not associated or
> of an unallocated allocatable variable shall not be inquired about."

That's a requirement for the user not to do:
   character(len=:), allocatable :: str
   n = len(str)  ! unallocated 'str'

which makes sense as 'len(str)' is not meaningful in this case and
the compiler might even access invalid memory in this case
and definitely undefined memory.

However, there is no reason why the user cannot do:
   if (allocated(str)) then
     n = len(str)
     m = len(str(5:8))
   end if
and why the compiler cannot replace the latter by 'm = 4'.

But, IMHO, the latter remark does _not_ imply that we
shall/must/have to accept code like:

if (allocated(str)) then
   block
      integer, parameter :: n = len(str(:5))
   end block
endif


> +static bool
> +substring_has_constant_len (gfc_expr *e)
> +{
> +  gfc_ref *ref;
> +  HOST_WIDE_INT istart, iend, length;
> +  bool equal_length = false;
> +
> +  if (e->ts.type != BT_CHARACTER || e->ts.deferred)
> +    return false;
cf. above.
> +
> +  for (ref = e->ref; ref; ref = ref->next)
> +    if (ref->type != REF_COMPONENT)
> +      break;
> +
> +  if (!ref
> +      || ref->type != REF_SUBSTRING
> +      || !ref->u.ss.start

With the caveat from above that len(<substring>) is rather special,
there is no real reason why:  str_array(:)(4:5)  cannot be handled.
(→ len = 2).

[I do note that "len(str(:5))" appears in your examples, hence,
I assume that ss.start is set in that case to '1'.]

> The updated patch regtests fine.  OK?
Looks good to me except for the caveats.

In particular:

  * * *

I think at least the array case should be handled.
On current mainline (i.e. without your patch),
I get (-fdump-tree-original)

   x = 5;  // Wrong - should be 1
   y = 1;  // OK

for

character(len=5) :: str2(4)
type t
    character(len=5) :: str(4)
end type t
type(t) :: var
integer :: x, y
x = len(var%str(:)(1:1))
y = len(str2(:)(1:1))
end

I don't know what's the result with your patch - but if
it is 'x = 5', it must be fixed.

  * * *

And while the following works

x = var%str(:)%len  ! ok, yields 5
y = str2(:)%len     ! ok, yields 5

the following is wrongly rejected:

x = var%str(:)(1:1)%len  ! Bogus: 'Invalid character in name'
y = str2(:)(1:1)%len     ! Bogus: 'Invalid character in name'

(likewise with '%kind')

(As "SUBSTRING % LEN", it also appears in the '16.9.99 INDEX',
but '9.4.5 Type parameter inquiry's 'R916 type-param-inquiry'
is the official reference.)

If you don't want to spend time on this subpart - you could
fill a PR.

  * * *

For deferred length, I have no strong opinion; in
any case, the upper substring bound > stringlen check
cannot be done in that case (at compile time). I think
I slightly prefer doing the optimization – but as is
is a special case and has some caveats (must be allocated,
upper bound check not possible, ...) I also see reasons
not to do it. Hence, it also can remain as in your patch.

Thanks,

Tobias

> Fortran - simplify length of substring with constant bounds
>
> gcc/fortran/ChangeLog:
>
>       PR fortran/100950
>       * simplify.c (substring_has_constant_len): New.
>       (gfc_simplify_len): Handle case of substrings with constant
>       bounds.
>
> gcc/testsuite/ChangeLog:
>
>       PR fortran/100950
>       * gfortran.dg/pr100950.f90: New test.

-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Harald Anlauf Aug. 18, 2021, 9:01 p.m. UTC | #11
Hi Tobias,

> Gesendet: Mittwoch, 18. August 2021 um 12:22 Uhr
> Von: "Tobias Burnus" <tobias@codesourcery.com>

> > Note, however, that gfc_simplify_len still won't handle neither
> > deferred strings nor their substrings.
> >
> > I think there is nothing to simplify at compile time here.
> 
> Obviously, nonsubstrings cannot be simplified but I do not
> see why  len(str(1:2))  cannot or should not be simplified.
> 
> (Not that I regard substring length inquiries as that common.)

well, here's an example that Intel rejects:

  type u
     character(8)              :: s(4)
     character(:), allocatable :: str
  end type u
  type(u) :: q
  integer, parameter :: k2 = len (q% s(:)(3:4)) ! OK
  integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel
  print *, k2
  if (k2 /= 2) stop 2
  print *, k3
  if (k3 /= 2) stop 3
end

pr100950-ww.f90(7): error #6814: When using this inquiry function, the length of this object cannot be evaluated to a constant.   [LEN]
  integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel
-----------------------------^
pr100950-ww.f90(7): error #7169: Bad initialization expression.   [LEN]
  integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel
-----------------------------^

Of course we could accept it regardless what others do.
I have therefore removed the check for deferred length
in the attached patch (but read on).

> However, there is no reason why the user cannot do:
>    if (allocated(str)) then
>      n = len(str)
>      m = len(str(5:8))
>    end if
> and why the compiler cannot replace the latter by 'm = 4'.

Maybe you can enlighten me here.  I thought one of the
purposes of gfc_simplify_len is to evaluate constant
expressions.  Of course the length is constant, provided
bounds are respected.  Otherwise the result is, well, ...

(It will then eveluate at runtime, which I thought was fine).

> But, IMHO, the latter remark does _not_ imply that we
> shall/must/have to accept code like:
> 
> if (allocated(str)) then
>    block
>       integer, parameter :: n = len(str(:5))
>    end block
> endif

So shall we not simplify here (and thus reject it)?
This is important!  Or silently simplify and accept it?

> With the caveat from above that len(<substring>) is rather special,
> there is no real reason why:  str_array(:)(4:5)  cannot be handled.
> (→ len = 2).

Good point.  This is fixed in the revised patch and tested for.

> > The updated patch regtests fine.  OK?
> Looks good to me except for the caveats.

Regtested again.

>   * * *
> 
> And while the following works
> 
> x = var%str(:)%len  ! ok, yields 5
> y = str2(:)%len     ! ok, yields 5
> 
> the following is wrongly rejected:
> 
> x = var%str(:)(1:1)%len  ! Bogus: 'Invalid character in name'
> y = str2(:)(1:1)%len     ! Bogus: 'Invalid character in name'
> 
> (likewise with '%kind')
> 
> (As "SUBSTRING % LEN", it also appears in the '16.9.99 INDEX',
> but '9.4.5 Type parameter inquiry's 'R916 type-param-inquiry'
> is the official reference.)
> 
> If you don't want to spend time on this subpart - you could
> fill a PR.

Well, there's already

 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735

which is a much better suited place for discussion.

>   * * *
> 
> For deferred length, I have no strong opinion; in
> any case, the upper substring bound > stringlen check
> cannot be done in that case (at compile time). I think
> I slightly prefer doing the optimization – but as is
> is a special case and has some caveats (must be allocated,
> upper bound check not possible, ...) I also see reasons
> not to do it. Hence, it also can remain as in your patch.

Actually, this is now an important point.  If we really want
to allow to handle substrings of deferred length strings
in constant expressions, the new patch would be fine,
otherwise I would have to reintroduce the hunk

+  if (e->ts.deferred)
+    return NULL;

and adjust the testcase.

Your choice.  See above.

Of course there may be more corner cases which I did not
think of...

Thanks,
Harald
Tobias Burnus Aug. 19, 2021, 7:52 a.m. UTC | #12
Hi Harald,

On 18.08.21 23:01, Harald Anlauf wrote:
>> Von: "Tobias Burnus"<tobias@codesourcery.com>
>>> Note, however, that gfc_simplify_len still won't handle neither
>>> deferred strings nor their substrings.
>> Obviously, nonsubstrings cannot be simplified but I do not
>> see why  len(str(1:2))  cannot or should not be simplified.
> well, here's an example that Intel rejects:
> ...
>       character(:), allocatable :: str
>    end type u
>    type(u) :: q
> ...
>    integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel
>
> pr100950-ww.f90(7): error #6814: When using this inquiry function, the length of this object cannot be evaluated to a constant.   [LEN]

I think the question is really how to interpret "10.1.12 Constant expression"

"(4) a specification inquiry where each designator or argument is
    ...
  (b) a variable whose properties inquired about are not
     (i) assumed,
     (ii) deferred, or
     (iii) defined by an expression that is not a constant expression,"

And as the substring bounds are constant expressions,
one can argue that (4)(b) is fulfilled as (i)–(iii) do not apply.

I am inclined to say that the Intel compiler has a bug by not
accepting it – but as written before, I regard sub-string length
(esp. with const expr) inquiries as an odd corner case which
is unlikely to occur in real-world code.

>> However, there is no reason why the user cannot do [...]
> Maybe you can enlighten me here.  [...]
I can't as I did not understand your question. However ...
>> But, IMHO, the latter remark does_not_  imply that we
>> shall/must/have to accept code like:
>>
>> if (allocated(str)) then
>>     block
>>        integer, parameter :: n = len(str(:5))
>>     end block
>> endif
> So shall we not simplify here (and thus reject it)?
> This is important!  Or silently simplify and accept it?

I tried to draw the line between simplification – to generate better code –
and 'constant expression' handling (accept where permitted, diagnose
non-standard-conforming code). — However, nearly but not quite always:
if it can be simplified to a constant the standard also regards it as
constant expression.

I think in for the purpose of the examples in this email thread,
we do not need to distinguish the two. — And can always simplify
deferred-length substrings where the substring bounds are const
expressions (or the lower-bound is absent and, hence, 1).

>> With the caveat from above that len(<substring>) is rather special,
>> there is no real reason why:  str_array(:)(4:5)  cannot be handled.
>> (→ len = 2).
> Good point.  This is fixed in the revised patch and tested for.

Still does not work – or rather: ...%t(:)(3:4) [i.e. substring with array section]
and ...%str(3:4) [i.e. substring of deferred-length scalar] both do work
but if one combines the two (→ ...%str2(:)(3:4), i.e. substring of deferred-length
array section), it does not:

Array ‘r’ at (1) is a variable, which does not reduce to a constant expression

for:

--- a/gcc/testsuite/gfortran.dg/pr100950.f90
+++ b/gcc/testsuite/gfortran.dg/pr100950.f90
@@ -15,2 +15,3 @@ program p
       character(len=:), allocatable :: str
+     character(len=:), allocatable :: str2(:)
    end type t_
@@ -24,2 +25,4 @@ program p
    integer,      parameter :: l6 = len (r(1)%str (3:4))
+  integer,      parameter :: l7 = len (r(1)%str2(1)(3:4))
+  integer,      parameter :: l8 = len (r(1)%str2(:)(3:4))


which feels odd.

>>> The updated patch regtests fine.  OK?
>> Looks good to me except for the caveats.
> Regtested again.
[...]
> Well, there's already
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735

I have added the example to the PR.

>> For deferred length, I have no strong opinion; [...]
> Actually, this is now an important point.  If we really want
> to allow to handle substrings of deferred length strings
> in constant expressions, the new patch would be fine,
I think handling len=: substrings is fine.

In principle, LGTM – except I wonder what we do about the
len(r(1)%str(1)(3:4));
I think we really do handle most code available and I would like to
close this
topic – but still it feels a bit odd to leave this bit out.

I was also wondering whether we should check that the
compile-time simplification works – i.e. use -fdump-tree-original for this;
I attached a patch for this.

Thanks,

Tobias

-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Harald Anlauf Aug. 19, 2021, 7:11 p.m. UTC | #13
Hi Tobias,

> I am inclined to say that the Intel compiler has a bug by not
> accepting it – but as written before, I regard sub-string length
> (esp. with const expr) inquiries as an odd corner case which
> is unlikely to occur in real-world code.

ok.

> Still does not work – or rather: ...%t(:)(3:4) [i.e. substring with array section]
> and ...%str(3:4) [i.e. substring of deferred-length scalar] both do work
> but if one combines the two (→ ...%str2(:)(3:4), i.e. substring of deferred-length
> array section), it does not:
> 
> Array ‘r’ at (1) is a variable, which does not reduce to a constant expression
> 
> for:
> 
> --- a/gcc/testsuite/gfortran.dg/pr100950.f90
> +++ b/gcc/testsuite/gfortran.dg/pr100950.f90
> @@ -15,2 +15,3 @@ program p
>        character(len=:), allocatable :: str
> +     character(len=:), allocatable :: str2(:)
>     end type t_
> @@ -24,2 +25,4 @@ program p
>     integer,      parameter :: l6 = len (r(1)%str (3:4))
> +  integer,      parameter :: l7 = len (r(1)%str2(1)(3:4))
> +  integer,      parameter :: l8 = len (r(1)%str2(:)(3:4))
> 
> 
> which feels odd.

I agree.  I have revised the code slightly to accept substrings
of deferred-length.  Your suggested variants now work correctly.

> In principle, LGTM – except I wonder what we do about the
> len(r(1)%str(1)(3:4));
> I think we really do handle most code available and I would like to
> close this
> topic – but still it feels a bit odd to leave this bit out.

That is handle now as discussed, see attached final patch.
Regtested again.

> I was also wondering whether we should check that the
> compile-time simplification works – i.e. use -fdump-tree-original for this;
> I attached a patch for this.

I added this to the final patch and taken the liberty to push the result
to master as d881460deb1f0bdfc3e8fa2d391a03a9763cbff4.

Thanks for your patience, given the rather extensive review...

Harald
H.J. Lu Aug. 20, 2021, 12:21 a.m. UTC | #14
On Thu, Aug 19, 2021 at 12:12 PM Harald Anlauf via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi Tobias,
>
> > I am inclined to say that the Intel compiler has a bug by not
> > accepting it – but as written before, I regard sub-string length
> > (esp. with const expr) inquiries as an odd corner case which
> > is unlikely to occur in real-world code.
>
> ok.
>
> > Still does not work – or rather: ...%t(:)(3:4) [i.e. substring with array section]
> > and ...%str(3:4) [i.e. substring of deferred-length scalar] both do work
> > but if one combines the two (→ ...%str2(:)(3:4), i.e. substring of deferred-length
> > array section), it does not:
> >
> > Array ‘r’ at (1) is a variable, which does not reduce to a constant expression
> >
> > for:
> >
> > --- a/gcc/testsuite/gfortran.dg/pr100950.f90
> > +++ b/gcc/testsuite/gfortran.dg/pr100950.f90
> > @@ -15,2 +15,3 @@ program p
> >        character(len=:), allocatable :: str
> > +     character(len=:), allocatable :: str2(:)
> >     end type t_
> > @@ -24,2 +25,4 @@ program p
> >     integer,      parameter :: l6 = len (r(1)%str (3:4))
> > +  integer,      parameter :: l7 = len (r(1)%str2(1)(3:4))
> > +  integer,      parameter :: l8 = len (r(1)%str2(:)(3:4))
> >
> >
> > which feels odd.
>
> I agree.  I have revised the code slightly to accept substrings
> of deferred-length.  Your suggested variants now work correctly.
>
> > In principle, LGTM – except I wonder what we do about the
> > len(r(1)%str(1)(3:4));
> > I think we really do handle most code available and I would like to
> > close this
> > topic – but still it feels a bit odd to leave this bit out.
>
> That is handle now as discussed, see attached final patch.
> Regtested again.
>
> > I was also wondering whether we should check that the
> > compile-time simplification works – i.e. use -fdump-tree-original for this;
> > I attached a patch for this.
>
> I added this to the final patch and taken the liberty to push the result
> to master as d881460deb1f0bdfc3e8fa2d391a03a9763cbff4.
>
> Thanks for your patience, given the rather extensive review...
>
> Harald

This may have broken bootstrap on 32-bit hosts:

https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html
Harald Anlauf Aug. 20, 2021, 6:48 a.m. UTC | #15
Hi!

> Gesendet: Freitag, 20. August 2021 um 02:21 Uhr
> Von: "H.J. Lu" <hjl.tools@gmail.com>

> This may have broken bootstrap on 32-bit hosts:
> 
> https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html

I do not understand the error message:

../../src-master/gcc/fortran/simplify.c: In function ‘bool substring_has_constant_len(gfc_expr*)’:
../../src-master/gcc/fortran/simplify.c:4557:22: error: unknown conversion type character ‘l’ in format [-Werror=format=]
 4557 |           gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC
      |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 4558 |                      ") at %L below 1",
      |                      ~~~~~~~~~~~~~~~~~
../../src-master/gcc/fortran/simplify.c:4557:22: error: format ‘%L’ expects argument of type ‘locus*’, but argument 2 has type ‘long long int’ [-Werror=format=]
 4557 |           gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC
      |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 4558 |                      ") at %L below 1",
      |                      ~~~~~~~~~~~~~~~~~
 4559 |                      istart, &ref->u.ss.start->where);
      |                      ~~~~~~
      |                      |
      |                      long long int
../../src-master/gcc/fortran/simplify.c:4557:22: error: too many arguments for format [-Werror=format-extra-args]

Is there an issue with HOST_WIDE_INT_PRINT_DEC on 32-bit hosts?
What is the right way to print a HOST_WIDE_INT?

It works on 64-bit without any warning.

Harald
Jakub Jelinek Aug. 20, 2021, 9:16 a.m. UTC | #16
On Fri, Aug 20, 2021 at 08:48:57AM +0200, Harald Anlauf via Gcc-patches wrote:
> Hi!
> 
> > Gesendet: Freitag, 20. August 2021 um 02:21 Uhr
> > Von: "H.J. Lu" <hjl.tools@gmail.com>
> 
> > This may have broken bootstrap on 32-bit hosts:
> > 
> > https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html
> 
> I do not understand the error message:
> 
> ../../src-master/gcc/fortran/simplify.c: In function ‘bool substring_has_constant_len(gfc_expr*)’:
> ../../src-master/gcc/fortran/simplify.c:4557:22: error: unknown conversion type character ‘l’ in format [-Werror=format=]
>  4557 |           gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC
>       |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>  4558 |                      ") at %L below 1",
>       |                      ~~~~~~~~~~~~~~~~~
> ../../src-master/gcc/fortran/simplify.c:4557:22: error: format ‘%L’ expects argument of type ‘locus*’, but argument 2 has type ‘long long int’ [-Werror=format=]
>  4557 |           gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC
>       |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>  4558 |                      ") at %L below 1",
>       |                      ~~~~~~~~~~~~~~~~~
>  4559 |                      istart, &ref->u.ss.start->where);
>       |                      ~~~~~~
>       |                      |
>       |                      long long int
> ../../src-master/gcc/fortran/simplify.c:4557:22: error: too many arguments for format [-Werror=format-extra-args]
> 
> Is there an issue with HOST_WIDE_INT_PRINT_DEC on 32-bit hosts?
> What is the right way to print a HOST_WIDE_INT?
> 
> It works on 64-bit without any warning.

gfc_error etc. aren't *printf family, it has its own format specifier
handling (e.g. it handles %L and %C), and it is even different
from the error/warning handling in middle-end/c-family FEs/backends.

HOST_WIDE_INT_PRINT_DEC is wrong in the above spot for 2 reasons:
1) gfc_error etc. argument is automatically marked for translation
   and translated, having a macro in there that expands to various things
   depending on host makes it impossible to mark for translation and
   a lottery for translation.
2) hwint.h defines:
#define HOST_LONG_FORMAT "l"
#define HOST_LONG_LONG_FORMAT "ll"
#if INT64_T_IS_LONG
# define GCC_PRI64 HOST_LONG_FORMAT
#else
# define GCC_PRI64 HOST_LONG_LONG_FORMAT
#endif
#define PRId64 GCC_PRI64 "d"
#define HOST_WIDE_INT_PRINT_DEC "%" PRId64
   but xm-mingw32.h overrides
#define HOST_LONG_LONG_FORMAT "I64"
   so effectively HOST_WIDE_INT_PRINT_DEC is "%ld", "%lld" or "%I64d"
   Now, gfc_error does handle %d or %ld, but doesn't handle %lld nor
   %I64d, so even if the 1) issue didn't exist, this explains why
   it works only on some hosts (e.g. x86_64-linux where %ld is used).
   But e.g. on i686-linux or many other hosts it is %lld which
   gfc_error doesn't handle and on Windows that %I64d.

Now, the non-Fortran FE diagnostic code actually has %wd for this (w
modifier like l modifier), which takes HOST_WIDE_INT/unsigned HOST_WIDE_INT
argument and prints it.

So, either you get through the hops to support that, unfortunately it isn't
just adding support for that in fortran/error.c (error_print) and some
helper functions, which wouldn't be that hard, just add 'w' next to 'l'
handling, TYPE_* for that and union member etc., but one needs to modify
c-family/c-format.c too to register the modifier so that gcc doesn't warn
about it and knows the proper argument type etc.

The other much easier but uglier option is to use a temporary buffer:
  char buffer[21];
  sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, hwint_val);
  gfc_error ("... %s ...", ... buffer ...);
This works, as it uses the host sprintf i.e. *printf family for which
HOST_WIDE_INT_PRINT_DEC macro is designed.

	Jakub
Tobias Burnus Aug. 20, 2021, 9:29 a.m. UTC | #17
On 20.08.21 02:21, H.J. Lu wrote:
> This may have broken bootstrap on 32-bit hosts:
> https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html
The latter has:
> ../../src-master/gcc/fortran/simplify.c:4557:22: error: unknown conversion type character ‘l’ in format [-Werror=format=]
>   4557 |           gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC
>        |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

HOST_WIDE_INT_PRINT_DEC is:
   #define HOST_WIDE_INT_PRINT_DEC "%" PRId64
and the latter is something like:
   "ld"  or "lld"

I fail to see why that shouldn't work – and also building with:

  CC="gcc-10 -m32 -fno-lto -O2" CXX="g++-10 -m32 -fno-lto -O2" .../configure --prefix=... --disable-multilib --disable-libstdcxx-pch --enable-multiarch --enable-languages=c,c++,fortran --disable-lto

did succeed.


Looking at the format checking:
   gfortran.h:void gfc_error (const char *, ...) ATTRIBUTE_GCC_GFC(1,2);
with
   gfortran.h:#define ATTRIBUTE_GCC_GFC(m, n) __attribute__ ((__format__ (__gcc_gfc__, m, n))) ATTRIBUTE_NONNULL(m)

I do see that the C/C++ part (which also uses
HOST_WIDE_INT_PRINT_DEC) have some special code
for HWI, which is used for via
    init_dynamic_diag_info
contains support for hwi not for gfc_{error,warning} via
    init_dynamic_gfc_info

I did wonder whether that causes the differences (→ attached patch).
On the other hand, %ld and %lld are pretty standard – and the
comment in the function talks about %w – which is not used
(except in spec files and in 'go').

Hence: No idea what's the problem or goes wrong.

Maybe the patch is still useful – at least it gives an
error when HWI is neither long nor long long ...
(Build with and without that patch with the options
shown aboved (and 'error:' in stage 1, 2, 3 plus
the usual bootstrap on x86-64.)

Comments?

Tobias

-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
diff mbox series

Patch

diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c
index c27b47aa98f..016ec259518 100644
--- a/gcc/fortran/simplify.c
+++ b/gcc/fortran/simplify.c
@@ -4512,6 +4512,60 @@  gfc_simplify_leadz (gfc_expr *e)
 }


+/* Check for constant length of a substring.  */
+
+static bool
+substring_has_constant_len (gfc_expr *e)
+{
+  ptrdiff_t istart, iend;
+  size_t length;
+  bool equal_length = false;
+
+  if (e->ts.type != BT_CHARACTER
+      || !(e->ref && e->ref->type == REF_SUBSTRING)
+      || !e->ref->u.ss.start
+      || e->ref->u.ss.start->expr_type != EXPR_CONSTANT
+      || !e->ref->u.ss.end
+      || e->ref->u.ss.end->expr_type != EXPR_CONSTANT
+      || !e->ref->u.ss.length
+      || !e->ref->u.ss.length->length
+      || e->ref->u.ss.length->length->expr_type != EXPR_CONSTANT)
+    return false;
+
+  /* Basic checks on substring starting and ending indices.  */
+  if (!gfc_resolve_substring (e->ref, &equal_length))
+    return false;
+
+  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
+  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
+  length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer);
+
+  if (istart <= iend)
+    {
+      if (istart < 1)
+	{
+	  gfc_error ("Substring start index (%ld) at %L below 1",
+		     (long) istart, &e->ref->u.ss.start->where);
+	  return false;
+	}
+      if (iend > (ssize_t) length)
+	{
+	  gfc_error ("Substring end index (%ld) at %L exceeds string "
+		     "length", (long) iend, &e->ref->u.ss.end->where);
+	  return false;
+	}
+      length = iend - istart + 1;
+    }
+  else
+    length = 0;
+
+  /* Fix substring length.  */
+  e->value.character.length = length;
+
+  return true;
+}
+
+
 gfc_expr *
 gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
 {
@@ -4547,6 +4601,13 @@  gfc_simplify_len (gfc_expr *e, gfc_expr *kind)
        of the unlimited polymorphic entity.  To get the _len component the last
        _data ref needs to be stripped and a ref to the _len component added.  */
     return gfc_get_len_component (e->symtree->n.sym->assoc->target, k);
+  else if (substring_has_constant_len (e))
+    {
+      result = gfc_get_constant_expr (BT_INTEGER, k, &e->where);
+      mpz_set_si (result->value.integer,
+		  e->value.character.length);
+      return range_check (result, "LEN");
+    }
   else
     return NULL;
 }
diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90
new file mode 100644
index 00000000000..f06db45b0b4
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr100950.f90
@@ -0,0 +1,18 @@ 
+! { dg-do run }
+! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
+
+program p
+  character(8), parameter :: u = "123"
+  character(8)            :: x = "", s
+  character(2)            :: w(2) = [character(len(x(3:4))) :: 'a','b' ]
+  character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ]
+  character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ]
+  if (len (y) /= 2) stop 1
+  if (len (z) /= 2) stop 2
+  if (any (w /= y)) stop 3
+  if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2)  stop 4
+  if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2)  stop 5
+  if (any ([character(len(x(3:4))) :: 'a','b' ]  /= y)) stop 6
+  write(s,*) [character(len(x(3:4))) :: 'a','b' ]
+  if (s /= " a b    ") stop 7
+end