Message ID | trinity-558ac6fa-645e-4b69-bd92-ed980a8db626-1623274785526@3c-app-gmx-bs42 |
---|---|
State | New |
Headers | show |
Series | PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514 | expand |
On Wed, 9 Jun 2021 23:39:45 +0200 Harald Anlauf via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c > index c27b47aa98f..016ec259518 100644 > --- a/gcc/fortran/simplify.c > +++ b/gcc/fortran/simplify.c > @@ -4512,6 +4512,60 @@ gfc_simplify_leadz (gfc_expr *e) > } > > > +/* Check for constant length of a substring. */ > + > +static bool > +substring_has_constant_len (gfc_expr *e) > +{ > + ptrdiff_t istart, iend; > + size_t length; > + bool equal_length = false; > + > + if (e->ts.type != BT_CHARACTER > + || !(e->ref && e->ref->type == REF_SUBSTRING) iff we ever can get here with e->ref == NULL then the below will not work too well. If so then maybe if (e->ts.type != BT_CHARACTER || ! e->ref || e->ref->type != REF_SUBSTRING ? > + || !e->ref->u.ss.start > + || e->ref->u.ss.start->expr_type != EXPR_CONSTANT > + || !e->ref->u.ss.end > + || e->ref->u.ss.end->expr_type != EXPR_CONSTANT > + || !e->ref->u.ss.length > + || !e->ref->u.ss.length->length > + || e->ref->u.ss.length->length->expr_type != EXPR_CONSTANT) > + return false; > + > + /* Basic checks on substring starting and ending indices. */ > + if (!gfc_resolve_substring (e->ref, &equal_length)) > + return false; > + > + istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer); > + iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer); > + length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer); > + > + if (istart <= iend) > + { > + if (istart < 1) > + { > + gfc_error ("Substring start index (%ld) at %L below 1", > + (long) istart, &e->ref->u.ss.start->where); > + return false; > + } > + if (iend > (ssize_t) length) > + { > + gfc_error ("Substring end index (%ld) at %L exceeds string " > + "length", (long) iend, &e->ref->u.ss.end->where); > + return false; > + } > + length = iend - istart + 1; > + } > + else > + length = 0; > + > + /* Fix substring length. */ > + e->value.character.length = length; > + > + return true; > +} > + > + > gfc_expr * > gfc_simplify_len (gfc_expr *e, gfc_expr *kind) > { > @@ -4547,6 +4601,13 @@ gfc_simplify_len (gfc_expr *e, gfc_expr *kind) > of the unlimited polymorphic entity. To get the _len component the last > _data ref needs to be stripped and a ref to the _len component added. */ > return gfc_get_len_component (e->symtree->n.sym->assoc->target, k); > + else if (substring_has_constant_len (e)) > + { > + result = gfc_get_constant_expr (BT_INTEGER, k, &e->where); > + mpz_set_si (result->value.integer, > + e->value.character.length); I think the mpz_set_si args above fit on one line. btw.. there's a commentary typo in add_init_expr_to_sym(): s/skeep/skip/ thanks, > + return range_check (result, "LEN"); > + } > else > return NULL; > } > diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90 > new file mode 100644 > index 00000000000..f06db45b0b4 > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/pr100950.f90 > @@ -0,0 +1,18 @@ > +! { dg-do run } > +! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514 > + > +program p > + character(8), parameter :: u = "123" > + character(8) :: x = "", s > + character(2) :: w(2) = [character(len(x(3:4))) :: 'a','b' ] > + character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ] > + character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ] > + if (len (y) /= 2) stop 1 > + if (len (z) /= 2) stop 2 > + if (any (w /= y)) stop 3 > + if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2) stop 4 > + if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2) stop 5 > + if (any ([character(len(x(3:4))) :: 'a','b' ] /= y)) stop 6 > + write(s,*) [character(len(x(3:4))) :: 'a','b' ] > + if (s /= " a b ") stop 7 > +end
On Thu, 10 Jun 2021 12:24:35 +0200 Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote: > On Wed, 9 Jun 2021 23:39:45 +0200 > Harald Anlauf via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > +/* Check for constant length of a substring. */ > > + > > +static bool > > +substring_has_constant_len (gfc_expr *e) > > +{ > > + ptrdiff_t istart, iend; > > + size_t length; > > + bool equal_length = false; > > + > > + if (e->ts.type != BT_CHARACTER > > + || !(e->ref && e->ref->type == REF_SUBSTRING) > > iff we ever can get here with e->ref == NULL then the below will not > work too well. If so then maybe > if (e->ts.type != BT_CHARACTER > || ! e->ref > || e->ref->type != REF_SUBSTRING > > ? Not sure what i was reading, maybe i read || instead of && in the braced condition. Your initial version works equally well of course although it's obviously harder to parse for at least some :) thanks,
Hi Bernhard, > > +static bool > > +substring_has_constant_len (gfc_expr *e) > > +{ > > + ptrdiff_t istart, iend; > > + size_t length; > > + bool equal_length = false; > > + > > + if (e->ts.type != BT_CHARACTER > > + || !(e->ref && e->ref->type == REF_SUBSTRING) > > iff we ever can get here with e->ref == NULL then the below will not > work too well. If so then maybe > if (e->ts.type != BT_CHARACTER > || ! e->ref > || e->ref->type != REF_SUBSTRING > > ? as you already realized, the logic was fine, but probably less readable than your version. I've changed the code accordingly. > > + else if (substring_has_constant_len (e)) > > + { > > + result = gfc_get_constant_expr (BT_INTEGER, k, &e->where); > > + mpz_set_si (result->value.integer, > > + e->value.character.length); > > I think the mpz_set_si args above fit on one line. That's true. Since this block is exactly the same as for constant strings, which is handled in the first condition, I've thought some more and am convinced now that these two can be fused. Done now. I've also added two cornercases to the testcase, and regtested again. > btw.. there's a commentary typo in add_init_expr_to_sym(): > s/skeep/skip/ That is a completely unrelated issue in a different file, right? Thanks for your constructive comments! Harald
On 10 June 2021 20:52:12 CEST, Harald Anlauf <anlauf@gmx.de> wrote: >> I think the mpz_set_si args above fit on one line. > >That's true. > >Since this block is exactly the same as for constant strings, >which is handled in the first condition, I've thought some more >and am convinced now that these two can be fused. Done now. Thanks. Btw.. I know that we cast hwi to long int often and use %ld but think there is a HOST_WIDE_INT_PRINT_DEC format macro to print a hwi. >I've also added two cornercases to the testcase, and regtested again. > >> btw.. there's a commentary typo in add_init_expr_to_sym(): >> s/skeep/skip/ > >That is a completely unrelated issue in a different file, right? Completely unrelated, yes. > >Thanks for your constructive comments! thanks for the patch!
*PING* > Gesendet: Mittwoch, 09. Juni 2021 um 23:39 Uhr > Von: "Harald Anlauf" <anlauf@gmx.de> > An: "fortran" <fortran@gcc.gnu.org>, "gcc-patches" <gcc-patches@gcc.gnu.org> > Betreff: [PATCH] PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514 > > Dear Fortranners, > > we should be able to simplify the length of a substring with known > constant bounds. The attached patch adds this. > > Regtested on x86_64-pc-linux-gnu. > > OK for mainline? Since this should be rather safe, to at least 11-branch? > > Thanks, > Harald > > > Fortran - simplify length of substring with constant bounds > > gcc/fortran/ChangeLog: > > PR fortran/100950 > * simplify.c (substring_has_constant_len): New. > (gfc_simplify_len): Handle case of substrings with constant > bounds. > > gcc/testsuite/ChangeLog: > > PR fortran/100950 > * gfortran.dg/pr100950.f90: New test. > >
Hi Harald, sorry for being way behind my review duties :-( On 10.06.21 20:52, Harald Anlauf via Fortran wrote: > +static bool > +substring_has_constant_len (gfc_expr *e) > +{ > + ptrdiff_t istart, iend; > + size_t length; > + bool equal_length = false; > + > + if (e->ts.type != BT_CHARACTER > + || !e->ref > + || e->ref->type != REF_SUBSTRING Is there a reason why you do not handle: type t character(len=5) :: str1 character(len=:), allocatable :: str2 end type type(t) :: x allocate(x%str2, source="abd") if (len (x%str)) /= 1) ... if (len (x%str2(1:2) /= 2) ... etc. Namely: Search the last_ref = expr->ref->next->next ...? and then check that lastref? * * * Slightly unrelated: I think the following does not violate F2018's R916 / C923 – but is rejected, namely: R916 type-param-inquiry is designator % type-param-name the latter is 'len' or 'kind' for intrinsic types. And: R901 designator is ... or substring But character(len=5) :: str print *, str(1:3)%len end fails with 2 | print *, str(1:3)%len | 1 Error: Syntax error in PRINT statement at (1) Assuming you don't want to handle it, can you open a new PR? Thanks! * * * That's in so far related to your patch as last_ref would then be the last ref before ref->next == NULL or before ref->next->type == REF_INQUIRY > + istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer); > + iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer); > + length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer); > + > + if (istart <= iend) > + { > + if (istart < 1) > + { > + gfc_error ("Substring start index (%ld) at %L below 1", > + (long) istart, &e->ref->u.ss.start->where); As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC. (It probably only matters on Windows which uses long == int = 32bit for strings longer than INT_MAX.) Thanks, Tobias ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
Hi Tobias, > On 10.06.21 20:52, Harald Anlauf via Fortran wrote: > > +static bool > > +substring_has_constant_len (gfc_expr *e) > > +{ > > + ptrdiff_t istart, iend; > > + size_t length; > > + bool equal_length = false; > > + > > + if (e->ts.type != BT_CHARACTER > > + || !e->ref > > + || e->ref->type != REF_SUBSTRING > > Is there a reason why you do not handle: > > type t > character(len=5) :: str1 > character(len=:), allocatable :: str2 > end type > type(t) :: x > > allocate(x%str2, source="abd") > if (len (x%str)) /= 1) ... > if (len (x%str2(1:2) /= 2) ... > etc. > > Namely: Search the last_ref = expr->ref->next->next ...? > and then check that lastref? I was assuming that the argument passed to LEN() is already the ultimate component for the case of substrings, and I was unable to find a case which requires implementing that iteration. The cases you provided do not seem to apply here: - derived type component str1, which is a string of given length, poses no problem. I added a case to the testcase, see attached updated patch. - derived type component str2 has deferred length. I do not see that the simplification can be applied here, as the allocation could lead to str2 being too short, and we do not want to simplify invalid code, such as: type t character(len=:), allocatable :: str2 end type type(t) :: x allocate(x%str2, source="z") if (len (x%str2(1:2)) /= 2) stop 1 end If we want this to be catchable by bounds checking, we need to punt at simplification of this. The updated patch skips deferred strings. > * * * > > Slightly unrelated: I think the following does not violate > F2018's R916 / C923 – but is rejected, namely: > R916 type-param-inquiry is designator % type-param-name > the latter is 'len' or 'kind' for intrinsic types. And: > R901 designator is ... > or substring > But > > character(len=5) :: str > print *, str(1:3)%len > end > > fails with > > 2 | print *, str(1:3)%len > | 1 > Error: Syntax error in PRINT statement at (1) > > > Assuming you don't want to handle it, can you open a new PR? > Thanks! Good point. I'd rather open a separate PR for this, though. > > + istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer); > > + iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer); > > + length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer); > > + > > + if (istart <= iend) > > + { > > + if (istart < 1) > > + { > > + gfc_error ("Substring start index (%ld) at %L below 1", > > + (long) istart, &e->ref->u.ss.start->where); > > As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC. > > (It probably only matters on Windows which uses long == int = 32bit for > strings longer than INT_MAX.) I am not familiar enough with Windows. What is HOST_WIDE_INT on that system? (As compared to e.g. size_t, ptrdiff_t). The (slightly) updated patch regtests fine. Thanks, Harald
Here's now my third attempt to fix this PR, taking into account the comments by Tobias and Bernhard. > > On 10.06.21 20:52, Harald Anlauf via Fortran wrote: > > > +static bool > > > +substring_has_constant_len (gfc_expr *e) > > > +{ > > > + ptrdiff_t istart, iend; > > > + size_t length; > > > + bool equal_length = false; > > > + > > > + if (e->ts.type != BT_CHARACTER > > > + || !e->ref > > > + || e->ref->type != REF_SUBSTRING > > > > Is there a reason why you do not handle: > > > > type t > > character(len=5) :: str1 > > character(len=:), allocatable :: str2 > > end type > > type(t) :: x > > > > allocate(x%str2, source="abd") > > if (len (x%str)) /= 1) ... > > if (len (x%str2(1:2) /= 2) ... > > etc. > > > > Namely: Search the last_ref = expr->ref->next->next ...? > > and then check that lastref? The mentioned search is now implemented. Note, however, that gfc_simplify_len still won't handle neither deferred strings nor their substrings. I think there is nothing to simplify at compile time here. Otherwise there would be a conflict/inconsistency with type parameter inquiry, see F2018:9.4.5(2): "A deferred type parameter of a pointer that is not associated or of an unallocated allocatable variable shall not be inquired about." > > * * * > > > > Slightly unrelated: I think the following does not violate > > F2018's R916 / C923 – but is rejected, namely: > > R916 type-param-inquiry is designator % type-param-name > > the latter is 'len' or 'kind' for intrinsic types. And: > > R901 designator is ... > > or substring > > But > > > > character(len=5) :: str > > print *, str(1:3)%len > > end > > > > fails with > > > > 2 | print *, str(1:3)%len > > | 1 > > Error: Syntax error in PRINT statement at (1) > > > > > > Assuming you don't want to handle it, can you open a new PR? > > Thanks! I tried to look into this, but there appear to be several unrelated issues requiring a separate treatment. I therefore opened: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735 > > > + istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer); > > > + iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer); > > > + length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer); > > > + > > > + if (istart <= iend) > > > + { > > > + if (istart < 1) > > > + { > > > + gfc_error ("Substring start index (%ld) at %L below 1", > > > + (long) istart, &e->ref->u.ss.start->where); > > > > As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC. > > > > (It probably only matters on Windows which uses long == int = 32bit for > > strings longer than INT_MAX.) Done. The updated patch regtests fine. OK? Thanks, Harald Fortran - simplify length of substring with constant bounds gcc/fortran/ChangeLog: PR fortran/100950 * simplify.c (substring_has_constant_len): New. (gfc_simplify_len): Handle case of substrings with constant bounds. gcc/testsuite/ChangeLog: PR fortran/100950 * gfortran.dg/pr100950.f90: New test.
*Ping* > Gesendet: Dienstag, 03. August 2021 um 23:17 Uhr > Von: "Harald Anlauf" <anlauf@gmx.de> > An: "Harald Anlauf" <anlauf@gmx.de> > Cc: "Tobias Burnus" <Tobias_Burnus@mentor.com>, "Bernhard Reutner-Fischer" <rep.dot.nop@gmail.com>, "Harald Anlauf via Gcc-patches" <gcc-patches@gcc.gnu.org>, "fortran" <fortran@gcc.gnu.org> > Betreff: Re: [PATCH] PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514 > > Here's now my third attempt to fix this PR, taking into account > the comments by Tobias and Bernhard. > > > > On 10.06.21 20:52, Harald Anlauf via Fortran wrote: > > > > +static bool > > > > +substring_has_constant_len (gfc_expr *e) > > > > +{ > > > > + ptrdiff_t istart, iend; > > > > + size_t length; > > > > + bool equal_length = false; > > > > + > > > > + if (e->ts.type != BT_CHARACTER > > > > + || !e->ref > > > > + || e->ref->type != REF_SUBSTRING > > > > > > Is there a reason why you do not handle: > > > > > > type t > > > character(len=5) :: str1 > > > character(len=:), allocatable :: str2 > > > end type > > > type(t) :: x > > > > > > allocate(x%str2, source="abd") > > > if (len (x%str)) /= 1) ... > > > if (len (x%str2(1:2) /= 2) ... > > > etc. > > > > > > Namely: Search the last_ref = expr->ref->next->next ...? > > > and then check that lastref? > > The mentioned search is now implemented. > > Note, however, that gfc_simplify_len still won't handle neither > deferred strings nor their substrings. > > I think there is nothing to simplify at compile time here. Otherwise > there would be a conflict/inconsistency with type parameter inquiry, > see F2018:9.4.5(2): > > "A deferred type parameter of a pointer that is not associated or > of an unallocated allocatable variable shall not be inquired about." > > > > * * * > > > > > > Slightly unrelated: I think the following does not violate > > > F2018's R916 / C923 – but is rejected, namely: > > > R916 type-param-inquiry is designator % type-param-name > > > the latter is 'len' or 'kind' for intrinsic types. And: > > > R901 designator is ... > > > or substring > > > But > > > > > > character(len=5) :: str > > > print *, str(1:3)%len > > > end > > > > > > fails with > > > > > > 2 | print *, str(1:3)%len > > > | 1 > > > Error: Syntax error in PRINT statement at (1) > > > > > > > > > Assuming you don't want to handle it, can you open a new PR? > > > Thanks! > > I tried to look into this, but there appear to be several unrelated > issues requiring a separate treatment. I therefore opened: > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735 > > > > > + istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer); > > > > + iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer); > > > > + length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer); > > > > + > > > > + if (istart <= iend) > > > > + { > > > > + if (istart < 1) > > > > + { > > > > + gfc_error ("Substring start index (%ld) at %L below 1", > > > > + (long) istart, &e->ref->u.ss.start->where); > > > > > > As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC. > > > > > > (It probably only matters on Windows which uses long == int = 32bit for > > > strings longer than INT_MAX.) > > Done. > > The updated patch regtests fine. OK? > > Thanks, > Harald > > > Fortran - simplify length of substring with constant bounds > > gcc/fortran/ChangeLog: > > PR fortran/100950 > * simplify.c (substring_has_constant_len): New. > (gfc_simplify_len): Handle case of substrings with constant > bounds. > > gcc/testsuite/ChangeLog: > > PR fortran/100950 > * gfortran.dg/pr100950.f90: New test. > >
Hi Harald, sorry for the belated review. On 03.08.21 23:17, Harald Anlauf wrote: >>> allocate(x%str2, source="abd") >>> if (len (x%str)) /= 1) ... >>> if (len (x%str2(1:2) /= 2) ... >>> etc. >>> >>> Namely: Search the last_ref = expr->ref->next->next ...? >>> and then check that lastref? > The mentioned search is now implemented. > > Note, however, that gfc_simplify_len still won't handle neither > deferred strings nor their substrings. > > I think there is nothing to simplify at compile time here. Obviously, nonsubstrings cannot be simplified but I do not see why len(str(1:2)) cannot or should not be simplified. (Not that I regard substring length inquiries as that common.) Regarding: > Otherwise > there would be a conflict/inconsistency with type parameter inquiry, > see F2018:9.4.5(2): > > "A deferred type parameter of a pointer that is not associated or > of an unallocated allocatable variable shall not be inquired about." That's a requirement for the user not to do: character(len=:), allocatable :: str n = len(str) ! unallocated 'str' which makes sense as 'len(str)' is not meaningful in this case and the compiler might even access invalid memory in this case and definitely undefined memory. However, there is no reason why the user cannot do: if (allocated(str)) then n = len(str) m = len(str(5:8)) end if and why the compiler cannot replace the latter by 'm = 4'. But, IMHO, the latter remark does _not_ imply that we shall/must/have to accept code like: if (allocated(str)) then block integer, parameter :: n = len(str(:5)) end block endif > +static bool > +substring_has_constant_len (gfc_expr *e) > +{ > + gfc_ref *ref; > + HOST_WIDE_INT istart, iend, length; > + bool equal_length = false; > + > + if (e->ts.type != BT_CHARACTER || e->ts.deferred) > + return false; cf. above. > + > + for (ref = e->ref; ref; ref = ref->next) > + if (ref->type != REF_COMPONENT) > + break; > + > + if (!ref > + || ref->type != REF_SUBSTRING > + || !ref->u.ss.start With the caveat from above that len(<substring>) is rather special, there is no real reason why: str_array(:)(4:5) cannot be handled. (→ len = 2). [I do note that "len(str(:5))" appears in your examples, hence, I assume that ss.start is set in that case to '1'.] > The updated patch regtests fine. OK? Looks good to me except for the caveats. In particular: * * * I think at least the array case should be handled. On current mainline (i.e. without your patch), I get (-fdump-tree-original) x = 5; // Wrong - should be 1 y = 1; // OK for character(len=5) :: str2(4) type t character(len=5) :: str(4) end type t type(t) :: var integer :: x, y x = len(var%str(:)(1:1)) y = len(str2(:)(1:1)) end I don't know what's the result with your patch - but if it is 'x = 5', it must be fixed. * * * And while the following works x = var%str(:)%len ! ok, yields 5 y = str2(:)%len ! ok, yields 5 the following is wrongly rejected: x = var%str(:)(1:1)%len ! Bogus: 'Invalid character in name' y = str2(:)(1:1)%len ! Bogus: 'Invalid character in name' (likewise with '%kind') (As "SUBSTRING % LEN", it also appears in the '16.9.99 INDEX', but '9.4.5 Type parameter inquiry's 'R916 type-param-inquiry' is the official reference.) If you don't want to spend time on this subpart - you could fill a PR. * * * For deferred length, I have no strong opinion; in any case, the upper substring bound > stringlen check cannot be done in that case (at compile time). I think I slightly prefer doing the optimization – but as is is a special case and has some caveats (must be allocated, upper bound check not possible, ...) I also see reasons not to do it. Hence, it also can remain as in your patch. Thanks, Tobias > Fortran - simplify length of substring with constant bounds > > gcc/fortran/ChangeLog: > > PR fortran/100950 > * simplify.c (substring_has_constant_len): New. > (gfc_simplify_len): Handle case of substrings with constant > bounds. > > gcc/testsuite/ChangeLog: > > PR fortran/100950 > * gfortran.dg/pr100950.f90: New test. ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Hi Tobias, > Gesendet: Mittwoch, 18. August 2021 um 12:22 Uhr > Von: "Tobias Burnus" <tobias@codesourcery.com> > > Note, however, that gfc_simplify_len still won't handle neither > > deferred strings nor their substrings. > > > > I think there is nothing to simplify at compile time here. > > Obviously, nonsubstrings cannot be simplified but I do not > see why len(str(1:2)) cannot or should not be simplified. > > (Not that I regard substring length inquiries as that common.) well, here's an example that Intel rejects: type u character(8) :: s(4) character(:), allocatable :: str end type u type(u) :: q integer, parameter :: k2 = len (q% s(:)(3:4)) ! OK integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel print *, k2 if (k2 /= 2) stop 2 print *, k3 if (k3 /= 2) stop 3 end pr100950-ww.f90(7): error #6814: When using this inquiry function, the length of this object cannot be evaluated to a constant. [LEN] integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel -----------------------------^ pr100950-ww.f90(7): error #7169: Bad initialization expression. [LEN] integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel -----------------------------^ Of course we could accept it regardless what others do. I have therefore removed the check for deferred length in the attached patch (but read on). > However, there is no reason why the user cannot do: > if (allocated(str)) then > n = len(str) > m = len(str(5:8)) > end if > and why the compiler cannot replace the latter by 'm = 4'. Maybe you can enlighten me here. I thought one of the purposes of gfc_simplify_len is to evaluate constant expressions. Of course the length is constant, provided bounds are respected. Otherwise the result is, well, ... (It will then eveluate at runtime, which I thought was fine). > But, IMHO, the latter remark does _not_ imply that we > shall/must/have to accept code like: > > if (allocated(str)) then > block > integer, parameter :: n = len(str(:5)) > end block > endif So shall we not simplify here (and thus reject it)? This is important! Or silently simplify and accept it? > With the caveat from above that len(<substring>) is rather special, > there is no real reason why: str_array(:)(4:5) cannot be handled. > (→ len = 2). Good point. This is fixed in the revised patch and tested for. > > The updated patch regtests fine. OK? > Looks good to me except for the caveats. Regtested again. > * * * > > And while the following works > > x = var%str(:)%len ! ok, yields 5 > y = str2(:)%len ! ok, yields 5 > > the following is wrongly rejected: > > x = var%str(:)(1:1)%len ! Bogus: 'Invalid character in name' > y = str2(:)(1:1)%len ! Bogus: 'Invalid character in name' > > (likewise with '%kind') > > (As "SUBSTRING % LEN", it also appears in the '16.9.99 INDEX', > but '9.4.5 Type parameter inquiry's 'R916 type-param-inquiry' > is the official reference.) > > If you don't want to spend time on this subpart - you could > fill a PR. Well, there's already https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735 which is a much better suited place for discussion. > * * * > > For deferred length, I have no strong opinion; in > any case, the upper substring bound > stringlen check > cannot be done in that case (at compile time). I think > I slightly prefer doing the optimization – but as is > is a special case and has some caveats (must be allocated, > upper bound check not possible, ...) I also see reasons > not to do it. Hence, it also can remain as in your patch. Actually, this is now an important point. If we really want to allow to handle substrings of deferred length strings in constant expressions, the new patch would be fine, otherwise I would have to reintroduce the hunk + if (e->ts.deferred) + return NULL; and adjust the testcase. Your choice. See above. Of course there may be more corner cases which I did not think of... Thanks, Harald
Hi Harald, On 18.08.21 23:01, Harald Anlauf wrote: >> Von: "Tobias Burnus"<tobias@codesourcery.com> >>> Note, however, that gfc_simplify_len still won't handle neither >>> deferred strings nor their substrings. >> Obviously, nonsubstrings cannot be simplified but I do not >> see why len(str(1:2)) cannot or should not be simplified. > well, here's an example that Intel rejects: > ... > character(:), allocatable :: str > end type u > type(u) :: q > ... > integer, parameter :: k3 = len (q% str (3:4)) ! Rejected by Intel > > pr100950-ww.f90(7): error #6814: When using this inquiry function, the length of this object cannot be evaluated to a constant. [LEN] I think the question is really how to interpret "10.1.12 Constant expression" "(4) a specification inquiry where each designator or argument is ... (b) a variable whose properties inquired about are not (i) assumed, (ii) deferred, or (iii) defined by an expression that is not a constant expression," And as the substring bounds are constant expressions, one can argue that (4)(b) is fulfilled as (i)–(iii) do not apply. I am inclined to say that the Intel compiler has a bug by not accepting it – but as written before, I regard sub-string length (esp. with const expr) inquiries as an odd corner case which is unlikely to occur in real-world code. >> However, there is no reason why the user cannot do [...] > Maybe you can enlighten me here. [...] I can't as I did not understand your question. However ... >> But, IMHO, the latter remark does_not_ imply that we >> shall/must/have to accept code like: >> >> if (allocated(str)) then >> block >> integer, parameter :: n = len(str(:5)) >> end block >> endif > So shall we not simplify here (and thus reject it)? > This is important! Or silently simplify and accept it? I tried to draw the line between simplification – to generate better code – and 'constant expression' handling (accept where permitted, diagnose non-standard-conforming code). — However, nearly but not quite always: if it can be simplified to a constant the standard also regards it as constant expression. I think in for the purpose of the examples in this email thread, we do not need to distinguish the two. — And can always simplify deferred-length substrings where the substring bounds are const expressions (or the lower-bound is absent and, hence, 1). >> With the caveat from above that len(<substring>) is rather special, >> there is no real reason why: str_array(:)(4:5) cannot be handled. >> (→ len = 2). > Good point. This is fixed in the revised patch and tested for. Still does not work – or rather: ...%t(:)(3:4) [i.e. substring with array section] and ...%str(3:4) [i.e. substring of deferred-length scalar] both do work but if one combines the two (→ ...%str2(:)(3:4), i.e. substring of deferred-length array section), it does not: Array ‘r’ at (1) is a variable, which does not reduce to a constant expression for: --- a/gcc/testsuite/gfortran.dg/pr100950.f90 +++ b/gcc/testsuite/gfortran.dg/pr100950.f90 @@ -15,2 +15,3 @@ program p character(len=:), allocatable :: str + character(len=:), allocatable :: str2(:) end type t_ @@ -24,2 +25,4 @@ program p integer, parameter :: l6 = len (r(1)%str (3:4)) + integer, parameter :: l7 = len (r(1)%str2(1)(3:4)) + integer, parameter :: l8 = len (r(1)%str2(:)(3:4)) which feels odd. >>> The updated patch regtests fine. OK? >> Looks good to me except for the caveats. > Regtested again. [...] > Well, there's already > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735 I have added the example to the PR. >> For deferred length, I have no strong opinion; [...] > Actually, this is now an important point. If we really want > to allow to handle substrings of deferred length strings > in constant expressions, the new patch would be fine, I think handling len=: substrings is fine. In principle, LGTM – except I wonder what we do about the len(r(1)%str(1)(3:4)); I think we really do handle most code available and I would like to close this topic – but still it feels a bit odd to leave this bit out. I was also wondering whether we should check that the compile-time simplification works – i.e. use -fdump-tree-original for this; I attached a patch for this. Thanks, Tobias ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Hi Tobias, > I am inclined to say that the Intel compiler has a bug by not > accepting it – but as written before, I regard sub-string length > (esp. with const expr) inquiries as an odd corner case which > is unlikely to occur in real-world code. ok. > Still does not work – or rather: ...%t(:)(3:4) [i.e. substring with array section] > and ...%str(3:4) [i.e. substring of deferred-length scalar] both do work > but if one combines the two (→ ...%str2(:)(3:4), i.e. substring of deferred-length > array section), it does not: > > Array ‘r’ at (1) is a variable, which does not reduce to a constant expression > > for: > > --- a/gcc/testsuite/gfortran.dg/pr100950.f90 > +++ b/gcc/testsuite/gfortran.dg/pr100950.f90 > @@ -15,2 +15,3 @@ program p > character(len=:), allocatable :: str > + character(len=:), allocatable :: str2(:) > end type t_ > @@ -24,2 +25,4 @@ program p > integer, parameter :: l6 = len (r(1)%str (3:4)) > + integer, parameter :: l7 = len (r(1)%str2(1)(3:4)) > + integer, parameter :: l8 = len (r(1)%str2(:)(3:4)) > > > which feels odd. I agree. I have revised the code slightly to accept substrings of deferred-length. Your suggested variants now work correctly. > In principle, LGTM – except I wonder what we do about the > len(r(1)%str(1)(3:4)); > I think we really do handle most code available and I would like to > close this > topic – but still it feels a bit odd to leave this bit out. That is handle now as discussed, see attached final patch. Regtested again. > I was also wondering whether we should check that the > compile-time simplification works – i.e. use -fdump-tree-original for this; > I attached a patch for this. I added this to the final patch and taken the liberty to push the result to master as d881460deb1f0bdfc3e8fa2d391a03a9763cbff4. Thanks for your patience, given the rather extensive review... Harald
On Thu, Aug 19, 2021 at 12:12 PM Harald Anlauf via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi Tobias, > > > I am inclined to say that the Intel compiler has a bug by not > > accepting it – but as written before, I regard sub-string length > > (esp. with const expr) inquiries as an odd corner case which > > is unlikely to occur in real-world code. > > ok. > > > Still does not work – or rather: ...%t(:)(3:4) [i.e. substring with array section] > > and ...%str(3:4) [i.e. substring of deferred-length scalar] both do work > > but if one combines the two (→ ...%str2(:)(3:4), i.e. substring of deferred-length > > array section), it does not: > > > > Array ‘r’ at (1) is a variable, which does not reduce to a constant expression > > > > for: > > > > --- a/gcc/testsuite/gfortran.dg/pr100950.f90 > > +++ b/gcc/testsuite/gfortran.dg/pr100950.f90 > > @@ -15,2 +15,3 @@ program p > > character(len=:), allocatable :: str > > + character(len=:), allocatable :: str2(:) > > end type t_ > > @@ -24,2 +25,4 @@ program p > > integer, parameter :: l6 = len (r(1)%str (3:4)) > > + integer, parameter :: l7 = len (r(1)%str2(1)(3:4)) > > + integer, parameter :: l8 = len (r(1)%str2(:)(3:4)) > > > > > > which feels odd. > > I agree. I have revised the code slightly to accept substrings > of deferred-length. Your suggested variants now work correctly. > > > In principle, LGTM – except I wonder what we do about the > > len(r(1)%str(1)(3:4)); > > I think we really do handle most code available and I would like to > > close this > > topic – but still it feels a bit odd to leave this bit out. > > That is handle now as discussed, see attached final patch. > Regtested again. > > > I was also wondering whether we should check that the > > compile-time simplification works – i.e. use -fdump-tree-original for this; > > I attached a patch for this. > > I added this to the final patch and taken the liberty to push the result > to master as d881460deb1f0bdfc3e8fa2d391a03a9763cbff4. > > Thanks for your patience, given the rather extensive review... > > Harald This may have broken bootstrap on 32-bit hosts: https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html
Hi! > Gesendet: Freitag, 20. August 2021 um 02:21 Uhr > Von: "H.J. Lu" <hjl.tools@gmail.com> > This may have broken bootstrap on 32-bit hosts: > > https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html I do not understand the error message: ../../src-master/gcc/fortran/simplify.c: In function ‘bool substring_has_constant_len(gfc_expr*)’: ../../src-master/gcc/fortran/simplify.c:4557:22: error: unknown conversion type character ‘l’ in format [-Werror=format=] 4557 | gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4558 | ") at %L below 1", | ~~~~~~~~~~~~~~~~~ ../../src-master/gcc/fortran/simplify.c:4557:22: error: format ‘%L’ expects argument of type ‘locus*’, but argument 2 has type ‘long long int’ [-Werror=format=] 4557 | gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4558 | ") at %L below 1", | ~~~~~~~~~~~~~~~~~ 4559 | istart, &ref->u.ss.start->where); | ~~~~~~ | | | long long int ../../src-master/gcc/fortran/simplify.c:4557:22: error: too many arguments for format [-Werror=format-extra-args] Is there an issue with HOST_WIDE_INT_PRINT_DEC on 32-bit hosts? What is the right way to print a HOST_WIDE_INT? It works on 64-bit without any warning. Harald
On Fri, Aug 20, 2021 at 08:48:57AM +0200, Harald Anlauf via Gcc-patches wrote: > Hi! > > > Gesendet: Freitag, 20. August 2021 um 02:21 Uhr > > Von: "H.J. Lu" <hjl.tools@gmail.com> > > > This may have broken bootstrap on 32-bit hosts: > > > > https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html > > I do not understand the error message: > > ../../src-master/gcc/fortran/simplify.c: In function ‘bool substring_has_constant_len(gfc_expr*)’: > ../../src-master/gcc/fortran/simplify.c:4557:22: error: unknown conversion type character ‘l’ in format [-Werror=format=] > 4557 | gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > 4558 | ") at %L below 1", > | ~~~~~~~~~~~~~~~~~ > ../../src-master/gcc/fortran/simplify.c:4557:22: error: format ‘%L’ expects argument of type ‘locus*’, but argument 2 has type ‘long long int’ [-Werror=format=] > 4557 | gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > 4558 | ") at %L below 1", > | ~~~~~~~~~~~~~~~~~ > 4559 | istart, &ref->u.ss.start->where); > | ~~~~~~ > | | > | long long int > ../../src-master/gcc/fortran/simplify.c:4557:22: error: too many arguments for format [-Werror=format-extra-args] > > Is there an issue with HOST_WIDE_INT_PRINT_DEC on 32-bit hosts? > What is the right way to print a HOST_WIDE_INT? > > It works on 64-bit without any warning. gfc_error etc. aren't *printf family, it has its own format specifier handling (e.g. it handles %L and %C), and it is even different from the error/warning handling in middle-end/c-family FEs/backends. HOST_WIDE_INT_PRINT_DEC is wrong in the above spot for 2 reasons: 1) gfc_error etc. argument is automatically marked for translation and translated, having a macro in there that expands to various things depending on host makes it impossible to mark for translation and a lottery for translation. 2) hwint.h defines: #define HOST_LONG_FORMAT "l" #define HOST_LONG_LONG_FORMAT "ll" #if INT64_T_IS_LONG # define GCC_PRI64 HOST_LONG_FORMAT #else # define GCC_PRI64 HOST_LONG_LONG_FORMAT #endif #define PRId64 GCC_PRI64 "d" #define HOST_WIDE_INT_PRINT_DEC "%" PRId64 but xm-mingw32.h overrides #define HOST_LONG_LONG_FORMAT "I64" so effectively HOST_WIDE_INT_PRINT_DEC is "%ld", "%lld" or "%I64d" Now, gfc_error does handle %d or %ld, but doesn't handle %lld nor %I64d, so even if the 1) issue didn't exist, this explains why it works only on some hosts (e.g. x86_64-linux where %ld is used). But e.g. on i686-linux or many other hosts it is %lld which gfc_error doesn't handle and on Windows that %I64d. Now, the non-Fortran FE diagnostic code actually has %wd for this (w modifier like l modifier), which takes HOST_WIDE_INT/unsigned HOST_WIDE_INT argument and prints it. So, either you get through the hops to support that, unfortunately it isn't just adding support for that in fortran/error.c (error_print) and some helper functions, which wouldn't be that hard, just add 'w' next to 'l' handling, TYPE_* for that and union member etc., but one needs to modify c-family/c-format.c too to register the modifier so that gcc doesn't warn about it and knows the proper argument type etc. The other much easier but uglier option is to use a temporary buffer: char buffer[21]; sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, hwint_val); gfc_error ("... %s ...", ... buffer ...); This works, as it uses the host sprintf i.e. *printf family for which HOST_WIDE_INT_PRINT_DEC macro is designed. Jakub
On 20.08.21 02:21, H.J. Lu wrote: > This may have broken bootstrap on 32-bit hosts: > https://gcc.gnu.org/pipermail/gcc-regression/2021-August/075209.html The latter has: > ../../src-master/gcc/fortran/simplify.c:4557:22: error: unknown conversion type character ‘l’ in format [-Werror=format=] > 4557 | gfc_error ("Substring start index (" HOST_WIDE_INT_PRINT_DEC > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HOST_WIDE_INT_PRINT_DEC is: #define HOST_WIDE_INT_PRINT_DEC "%" PRId64 and the latter is something like: "ld" or "lld" I fail to see why that shouldn't work – and also building with: CC="gcc-10 -m32 -fno-lto -O2" CXX="g++-10 -m32 -fno-lto -O2" .../configure --prefix=... --disable-multilib --disable-libstdcxx-pch --enable-multiarch --enable-languages=c,c++,fortran --disable-lto did succeed. Looking at the format checking: gfortran.h:void gfc_error (const char *, ...) ATTRIBUTE_GCC_GFC(1,2); with gfortran.h:#define ATTRIBUTE_GCC_GFC(m, n) __attribute__ ((__format__ (__gcc_gfc__, m, n))) ATTRIBUTE_NONNULL(m) I do see that the C/C++ part (which also uses HOST_WIDE_INT_PRINT_DEC) have some special code for HWI, which is used for via init_dynamic_diag_info contains support for hwi not for gfc_{error,warning} via init_dynamic_gfc_info I did wonder whether that causes the differences (→ attached patch). On the other hand, %ld and %lld are pretty standard – and the comment in the function talks about %w – which is not used (except in spec files and in 'go'). Hence: No idea what's the problem or goes wrong. Maybe the patch is still useful – at least it gives an error when HWI is neither long nor long long ... (Build with and without that patch with the options shown aboved (and 'error:' in stage 1, 2, 3 plus the usual bootstrap on x86-64.) Comments? Tobias ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c index c27b47aa98f..016ec259518 100644 --- a/gcc/fortran/simplify.c +++ b/gcc/fortran/simplify.c @@ -4512,6 +4512,60 @@ gfc_simplify_leadz (gfc_expr *e) } +/* Check for constant length of a substring. */ + +static bool +substring_has_constant_len (gfc_expr *e) +{ + ptrdiff_t istart, iend; + size_t length; + bool equal_length = false; + + if (e->ts.type != BT_CHARACTER + || !(e->ref && e->ref->type == REF_SUBSTRING) + || !e->ref->u.ss.start + || e->ref->u.ss.start->expr_type != EXPR_CONSTANT + || !e->ref->u.ss.end + || e->ref->u.ss.end->expr_type != EXPR_CONSTANT + || !e->ref->u.ss.length + || !e->ref->u.ss.length->length + || e->ref->u.ss.length->length->expr_type != EXPR_CONSTANT) + return false; + + /* Basic checks on substring starting and ending indices. */ + if (!gfc_resolve_substring (e->ref, &equal_length)) + return false; + + istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer); + iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer); + length = gfc_mpz_get_hwi (e->ref->u.ss.length->length->value.integer); + + if (istart <= iend) + { + if (istart < 1) + { + gfc_error ("Substring start index (%ld) at %L below 1", + (long) istart, &e->ref->u.ss.start->where); + return false; + } + if (iend > (ssize_t) length) + { + gfc_error ("Substring end index (%ld) at %L exceeds string " + "length", (long) iend, &e->ref->u.ss.end->where); + return false; + } + length = iend - istart + 1; + } + else + length = 0; + + /* Fix substring length. */ + e->value.character.length = length; + + return true; +} + + gfc_expr * gfc_simplify_len (gfc_expr *e, gfc_expr *kind) { @@ -4547,6 +4601,13 @@ gfc_simplify_len (gfc_expr *e, gfc_expr *kind) of the unlimited polymorphic entity. To get the _len component the last _data ref needs to be stripped and a ref to the _len component added. */ return gfc_get_len_component (e->symtree->n.sym->assoc->target, k); + else if (substring_has_constant_len (e)) + { + result = gfc_get_constant_expr (BT_INTEGER, k, &e->where); + mpz_set_si (result->value.integer, + e->value.character.length); + return range_check (result, "LEN"); + } else return NULL; } diff --git a/gcc/testsuite/gfortran.dg/pr100950.f90 b/gcc/testsuite/gfortran.dg/pr100950.f90 new file mode 100644 index 00000000000..f06db45b0b4 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr100950.f90 @@ -0,0 +1,18 @@ +! { dg-do run } +! PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514 + +program p + character(8), parameter :: u = "123" + character(8) :: x = "", s + character(2) :: w(2) = [character(len(x(3:4))) :: 'a','b' ] + character(*), parameter :: y(*) = [character(len(u(3:4))) :: 'a','b' ] + character(*), parameter :: z(*) = [character(len(x(3:4))) :: 'a','b' ] + if (len (y) /= 2) stop 1 + if (len (z) /= 2) stop 2 + if (any (w /= y)) stop 3 + if (len ([character(len(u(3:4))) :: 'a','b' ]) /= 2) stop 4 + if (len ([character(len(x(3:4))) :: 'a','b' ]) /= 2) stop 5 + if (any ([character(len(x(3:4))) :: 'a','b' ] /= y)) stop 6 + write(s,*) [character(len(x(3:4))) :: 'a','b' ] + if (s /= " a b ") stop 7 +end