Message ID | f896ae17-d74b-4906-b78d-33fd8f20636d@hexco.de |
---|---|
State | New |
Headers | show |
Series | [v2] replace atoi with strtol in varasm.cc (decode_reg_name_and_count) [PR114540] | expand |
On Sat, Nov 23, 2024 at 07:26:55PM +0100, Heiko Eißfeldt wrote: > --- a/gcc/varasm.cc > +++ b/gcc/varasm.cc > @@ -993,9 +993,12 @@ decode_reg_name_and_count (const char *asmspec, int *pnregs) > break; > if (asmspec[0] != 0 && i < 0) > { > - i = atoi (asmspec); > - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) > - return i; > + char *pend{}; Why the {} strtol will write to pend no matter what, so it doesn't need to be cleared. > + errno = 0; > + long j = strtol (asmspec, &pend, 10); > + if (errno != ERANGE && j <= INT_MAX > + && j < FIRST_PSEUDO_REGISTER && j >= 0 && reg_names[j][0]) > + return j; If all conditions don't fit on a single line, all should be written on separate lines, so if (errno != ERANGE && ... && ...) I'm not sure what is the point of the j <= INT_MAX check, FIRST_PSEUDO_REGISTER will surely be smaller than INT_MAX on all targets. Furthermore, when strtol is used, I wonder if that for (i = strlen (asmspec) - 1; i >= 0; i--) if (! ISDIGIT (asmspec[i])) break; isn't just a waste of time, if it wouldn't be enough to test that ISDIGIT (asmspec[0]) and *pend == '\0' after the strtol call (if not, it should fallthrough to the name matching rather than just return -2. So - for (i = strlen (asmspec) - 1; i >= 0; i--) - if (! ISDIGIT (asmspec[i])) - break; - if (asmspec[0] != 0 && i < 0) - { - i = atoi (asmspec); - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) - return i; - else - return -2; - } + if (ISDIGIT (asmspec[0])) + { + char *pend; + errno = 0; + unsigned long j = strtoul (asmspec, &pend, 10); + if (*pend == '\0') + { + if (errno != ERANGE + && j < FIRST_PSEUDO_REGISTER + && reg_names[j][0]) + return j; + else + return -2; + } + } Jakub
Thanks for your feedback! On 11/23/24 11:02 PM, Jakub Jelinek wrote: > On Sat, Nov 23, 2024 at 07:26:55PM +0100, Heiko Eißfeldt wrote: >> --- a/gcc/varasm.cc >> +++ b/gcc/varasm.cc >> @@ -993,9 +993,12 @@ decode_reg_name_and_count (const char *asmspec, int *pnregs) >> break; >> if (asmspec[0] != 0 && i < 0) >> { >> - i = atoi (asmspec); >> - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) >> - return i; >> + char *pend{}; > Why the {} strtol will write to pend no matter what, so it doesn't need to > be cleared. > >> + errno = 0; >> + long j = strtol (asmspec, &pend, 10); >> + if (errno != ERANGE && j <= INT_MAX >> + && j < FIRST_PSEUDO_REGISTER && j >= 0 && reg_names[j][0]) >> + return j; > If all conditions don't fit on a single line, all should be written > on separate lines, so > if (errno != ERANGE > && ... > && ...) > I'm not sure what is the point of the j <= INT_MAX check, > FIRST_PSEUDO_REGISTER will surely be smaller than INT_MAX on all targets. > > Furthermore, when strtol is used, I wonder if that > for (i = strlen (asmspec) - 1; i >= 0; i--) > if (! ISDIGIT (asmspec[i])) > break; > isn't just a waste of time, if it wouldn't be enough to test that > ISDIGIT (asmspec[0]) and *pend == '\0' after the strtol call (if not, > it should fallthrough to the name matching rather than just return -2. I agree, but was hesitant to change more than the offending atoi() part. > So > - for (i = strlen (asmspec) - 1; i >= 0; i--) > - if (! ISDIGIT (asmspec[i])) > - break; > - if (asmspec[0] != 0 && i < 0) > - { > - i = atoi (asmspec); > - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) > - return i; > - else > - return -2; > - } > + if (ISDIGIT (asmspec[0])) > + { > + char *pend; > + errno = 0; > + unsigned long j = strtoul (asmspec, &pend, 10); > + if (*pend == '\0') > + { I would then add a + static_assert ( FIRST_PSEUDO_REGISTER <= INT_MAX ); here to 'document' the underlying assumption. I hope that is ok. Would it make sense to have a nullptr check for asmspec at the beginning, too? > + if (errno != ERANGE > + && j < FIRST_PSEUDO_REGISTER > + && reg_names[j][0]) > + return j; > + else > + return -2; > + } > + } > > Jakub > diff --git a/gcc/varasm.cc b/gcc/varasm.cc index dd67dd441c0..3e3e4bc5b42 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -988,16 +988,21 @@ decode_reg_name_and_count (const char *asmspec, int *pnregs) asmspec = strip_reg_name (asmspec); /* Allow a decimal number as a "register name". */ - for (i = strlen (asmspec) - 1; i >= 0; i--) - if (! ISDIGIT (asmspec[i])) - break; - if (asmspec[0] != 0 && i < 0) + if (ISDIGIT (asmspec[0])) { - i = atoi (asmspec); - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) - return i; - else - return -2; + char *pend; + errno = 0; + unsigned long j = strtoul (asmspec, &pend, 10); + if (*pend == '\0') + { + static_assert( FIRST_PSEUDO_REGISTER <= INT_MAX ); + if (errno != ERANGE + && j < FIRST_PSEUDO_REGISTER + && reg_names[j][0]) + return j; + else + return -2; + } } for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
On Tue, Nov 26, 2024 at 06:51:40AM +0100, Heiko Eißfeldt wrote: > Would it make sense to have a nullptr check for asmspec at the > beginning, too? No. There is already if (asmspec != 0) around it which guarantees it and strip_reg_name will never return NULL. > diff --git a/gcc/varasm.cc b/gcc/varasm.cc > index dd67dd441c0..3e3e4bc5b42 100644 > --- a/gcc/varasm.cc > +++ b/gcc/varasm.cc > @@ -988,16 +988,21 @@ decode_reg_name_and_count (const char *asmspec, int *pnregs) > asmspec = strip_reg_name (asmspec); > > /* Allow a decimal number as a "register name". */ > - for (i = strlen (asmspec) - 1; i >= 0; i--) > - if (! ISDIGIT (asmspec[i])) > - break; > - if (asmspec[0] != 0 && i < 0) > + if (ISDIGIT (asmspec[0])) > { > - i = atoi (asmspec); > - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) > - return i; > - else > - return -2; > + char *pend; > + errno = 0; > + unsigned long j = strtoul (asmspec, &pend, 10); > + if (*pend == '\0') > + { > + static_assert( FIRST_PSEUDO_REGISTER <= INT_MAX ); This is wrongly formatted and valid only in C++17 and later, while GCC is currently written in C++14. So it needs to be static_assert (FIRST_PSEUDO_REGISTER <= INT_MAX, ""); > + if (errno != ERANGE > + && j < FIRST_PSEUDO_REGISTER > + && reg_names[j][0]) > + return j; > + else > + return -2; > + } > } > > for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) Ok for trunk with that change. Jakub
Thanks for your patience! On 11/26/24 8:36 AM, Jakub Jelinek wrote: > This is wrongly formatted and valid only in C++17 and later, while > GCC is currently written in C++14. > So it needs to be > static_assert (FIRST_PSEUDO_REGISTER <= INT_MAX, ""); done. Heiko diff --git a/gcc/varasm.cc b/gcc/varasm.cc index dd67dd441c0..9d5299533b5 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -988,16 +988,21 @@ decode_reg_name_and_count (const char *asmspec, int *pnregs) asmspec = strip_reg_name (asmspec); /* Allow a decimal number as a "register name". */ - for (i = strlen (asmspec) - 1; i >= 0; i--) - if (! ISDIGIT (asmspec[i])) - break; - if (asmspec[0] != 0 && i < 0) + if (ISDIGIT (asmspec[0])) { - i = atoi (asmspec); - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) - return i; - else - return -2; + char *pend; + errno = 0; + unsigned long j = strtoul (asmspec, &pend, 10); + if (*pend == '\0') + { + static_assert (FIRST_PSEUDO_REGISTER <= INT_MAX, ""); + if (errno != ERANGE + && j < FIRST_PSEUDO_REGISTER + && reg_names[j][0]) + return j; + else + return -2; + } } for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
On Tue, Nov 26, 2024 at 06:19:46PM +0100, Heiko Eißfeldt wrote: > Thanks for your patience! > > On 11/26/24 8:36 AM, Jakub Jelinek wrote: > > This is wrongly formatted and valid only in C++17 and later, while > > GCC is currently written in C++14. > > So it needs to be > > static_assert (FIRST_PSEUDO_REGISTER <= INT_MAX, ""); > done. There are three important parts missing. I don't see you in MAINTAINERS file, so you need to decide if you assign copyright to FSF or submit this under DCO. See https://gcc.gnu.org/contribute.html#legal for more details (if you already have FSF assignment on file, somebody would need to check that, I don't have access to that). Another part is that a ChangeLog entry is missing (as documented in https://gcc.gnu.org/codingconventions.html#ChangeLogs). For your patch, that would be something like: PR middle-end/114540 * varasm.cc (decode_reg_name_and_count): Use strtoul instead of atoi and simplify verification the whole asmspec contains just decimal digits. (if testcase is added, empty line and * testcase_filename_relative_to_gcc/testsuite/: New test. added too; and if you go with DCO, followed by empty line and Signed-Off-By: line). The third part is missing testcase, for PR like this it should be probably in gcc/testsuite/gcc.dg/pr114540.c, have /* PR middle-end/114540 */ /* { dg-do compile } */ and add /* { dg-error "whatever error to expect" } */ directives to the lines on which errors will appear after the patch. Jakub
> There are three important parts missing. > > I don't see you in MAINTAINERS file, so you need to decide if you assign > copyright to FSF or submit this under DCO. I wonder if it is ok to add myself to MAINTAINERS file? > Seehttps://gcc.gnu.org/contribute.html#legal for more details (if you > already have FSF assignment on file, somebody would need to check that, > I don't have access to that). I wanted to assign copyright to FSF but got no answer from assign@fsf.org (nor from assign@gnu.org) for my request of the required documents yet. So for now I go with DCO. > Another part is that a ChangeLog entry is missing (as documented in > https://gcc.gnu.org/codingconventions.html#ChangeLogs). > For your patch, that would be something like: > PR middle-end/114540 > * varasm.cc (decode_reg_name_and_count): Use strtoul instead of atoi > and simplify verification the whole asmspec contains just decimal > digits. > (if testcase is added, empty line and > * testcase_filename_relative_to_gcc/testsuite/: New test. > added too; and if you go with DCO, followed by empty line and Signed-Off-By: line). > > The third part is missing testcase, for PR like this it should be probably > in gcc/testsuite/gcc.dg/pr114540.c, have > /* PR middle-end/114540 */ > /* { dg-do compile } */ > > and add /* { dg-error "whatever error to expect" } */ > directives to the lines on which errors will appear after the patch. Done. diff --git a/ChangeLog b/ChangeLog index a0b48aa45cb..9b8dac9fc91 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2024-11-28 Heiko Eißfeldt <heiko@hexco.de> + + * MAINTAINERS: Add myself to write after approval. + 2024-11-25 Sandra Loosemore <sloosemore@baylibre.com> * MAINTAINERS: Remove references to nios2. diff --git a/MAINTAINERS b/MAINTAINERS index 26455d1cabf..0fab8de6ac7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -453,6 +453,7 @@ David Edelsohn dje <dje.gcc@gmail.com> Bernd Edlinger edlinger <bernd.edlinger@hotmail.de> Phil Edwards pme <pme@gcc.gnu.org> Mark Eggleston markeggleston <mark.eggleston@codethink.co.uk> +Heiko Eißfeldt - <heiko@hexco.de> Steve Ellcey sje <sellcey@caviumnetworks.com> Ben Elliston bje <config-patches@gnu.org> Mohan Embar membar <gnustuff@thisiscool.com> diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2be87f2079c..53295e34434 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2024-11-28 Heiko Eißfeldt <heiko@hexco.de> + + PR middle-end/114540 + * varasm.cc (decode_reg_name_and_count): Use strtoul instead of atoi + and simplify verification the whole asmspec contains just decimal + digits. + + * gcc.dg/pr114540.c: New test. + + Signed-off-by: Heiko Eißfeldt <heiko@hexco.de> + Co-authored-by: Jakub Jelinek <jakub@redhat.com> + + 2024-11-27 Uros Bizjak <ubizjak@gmail.com> PR target/36503 diff --git a/gcc/testsuite/gcc.dg/pr114540.c b/gcc/testsuite/gcc.dg/pr114540.c new file mode 100644 index 00000000000..6d1aadc443f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr114540.c @@ -0,0 +1,25 @@ +/* PR middle-end/114540 */ +/* { dg-do compile } */ +/* { dg-options "" } */ + +void f() +{ + asm("":::"2147483648"); /* INT_MAX + 1 { dg-error "unknown register name" } */ + asm("":::"4294967296"); /* UINT_MAX + 1 { dg-error "unknown register name" } */ + asm("":::"18446744073709551616"); /* ULONG_MAX + 1 { dg-error "unknown register name" } */ + asm("":::"9223372036854775808"); /* LONG_MAX + 1 { dg-error "unknown register name" } */ + asm("":::"9223372036854775807"); /* LONG_MAX { dg-error "unknown register name" } */ + asm("":::"2147483647"); /* INT_MAX { dg-error "unknown register name" } */ + asm("":::"2147483647&"); /* INT_MAX + garbage char { dg-error "unknown register name" } */ + asm("":::"0"); /* real reg */ + + register int a asm("2147483648"); /* INT_MAX + 1 { dg-error "invalid register name for" } */ + register int b asm("4294967296"); /* UINT_MAX + 1 { dg-error "invalid register name for" } */ + register int c asm("18446744073709551616"); /* ULONG_MAX + 1 { dg-error "invalid register name for" } */ + register int d asm("9223372036854775808"); /* LONG_MAX + 1 { dg-error "invalid register name for" } */ + register int e asm("9223372036854775807"); /* LONG_MAX { dg-error "invalid register name for" } */ + register int f asm("2147483647"); /* INT_MAX { dg-error "invalid register name for" } */ + register int g asm("2147483647&"); /* INT_MAX + garbage char { dg-error "invalid register name for" } */ + register int h asm("0"); /* real reg */ +} + diff --git a/gcc/varasm.cc b/gcc/varasm.cc index be11123180c..261621a18c7 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -990,16 +990,21 @@ decode_reg_name_and_count (const char *asmspec, int *pnregs) asmspec = strip_reg_name (asmspec); /* Allow a decimal number as a "register name". */ - for (i = strlen (asmspec) - 1; i >= 0; i--) - if (! ISDIGIT (asmspec[i])) - break; - if (asmspec[0] != 0 && i < 0) + if (ISDIGIT (asmspec[0])) { - i = atoi (asmspec); - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) - return i; - else - return -2; + char *pend; + errno = 0; + unsigned long j = strtoul (asmspec, &pend, 10); + if (*pend == '\0') + { + static_assert (FIRST_PSEUDO_REGISTER <= INT_MAX, ""); + if (errno != ERANGE + && j < FIRST_PSEUDO_REGISTER + && reg_names[j][0]) + return j; + else + return -2; + } } for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
On Thu, Nov 28, 2024 at 08:15:08PM +0100, Heiko Eißfeldt wrote: > > There are three important parts missing. > > > > I don't see you in MAINTAINERS file, so you need to decide if you assign > > copyright to FSF or submit this under DCO. > I wonder if it is ok to add myself to MAINTAINERS file? No. That happens only if/when one gets account on sourceware, as a first commit under the new rights. And account is given only after a few earlier contributions where somebody commits it for you. > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,16 @@ > +2024-11-28 Heiko Eißfeldt <heiko@hexco.de> > + > + PR middle-end/114540 > + * varasm.cc (decode_reg_name_and_count): Use strtoul instead of atoi > + and simplify verification the whole asmspec contains just decimal > + digits. > + > + * gcc.dg/pr114540.c: New test. > + > + Signed-off-by: Heiko Eißfeldt <heiko@hexco.de> > + Co-authored-by: Jakub Jelinek <jakub@redhat.com> We don't put the ChangeLog changes into the patch being committed, that would result in patch conflicts all the time, so the ChangeLog entry is written in the plaintext at the end of the commit message instead and a daily script processes commits and populates ChangeLog files. > + asm("":::"2147483647&"); /* INT_MAX + garbage char { dg-error "unknown register name" } */ I chose to use 4& instead of 2147483647& because for 2147483647 it would be rejected anyway, no matter whether there is garbage after it or not. And I've added one case with garbage in between. > + > + register int a asm("2147483648"); /* INT_MAX + 1 { dg-error "invalid register name for" } */ > + register int b asm("4294967296"); /* UINT_MAX + 1 { dg-error "invalid register name for" } */ > + register int c asm("18446744073709551616"); /* ULONG_MAX + 1 { dg-error "invalid register name for" } */ > + register int d asm("9223372036854775808"); /* LONG_MAX + 1 { dg-error "invalid register name for" } */ > + register int e asm("9223372036854775807"); /* LONG_MAX { dg-error "invalid register name for" } */ > + register int f asm("2147483647"); /* INT_MAX { dg-error "invalid register name for" } */ > + register int g asm("2147483647&"); /* INT_MAX + garbage char { dg-error "invalid register name for" } */ > + register int h asm("0"); /* real reg */ This one doesn't really work on all targets, e.g. on hppa-linux I get error: the register specified for ‘h’ is not general enough to be used as a register variable So, I've guarded the 0 cases with #if check for a couple of targets which handle register 0 well. Here is what I've committed in https://gcc.gnu.org/r15-5877 for you: replace atoi with strtoul in varasm.cc (decode_reg_name_and_count) [PR114540] The function uses atoi, which can silently return valid numbers even for some too large numbers in the string. Furthermore, the verification that all the characters in asmspec are decimal digits can be simplified when using strotoul, we can check just the first digit and whether the end pointer points to '\0'. 2024-12-03 Heiko Eißfeldt <heiko@hexco.de> PR middle-end/114540 * varasm.cc (decode_reg_name_and_count): Use strtoul instead of atoi and simplify verification that the whole asmspec contains just decimal digits. * gcc.dg/pr114540.c: New test. Signed-off-by: Heiko Eißfeldt <heiko@hexco.de> Co-authored-by: Jakub Jelinek <jakub@redhat.com> --- gcc/varasm.cc.jj 2024-11-28 11:42:02.289812658 +0100 +++ gcc/varasm.cc 2024-12-02 19:49:40.602841404 +0100 @@ -990,16 +990,21 @@ decode_reg_name_and_count (const char *a asmspec = strip_reg_name (asmspec); /* Allow a decimal number as a "register name". */ - for (i = strlen (asmspec) - 1; i >= 0; i--) - if (! ISDIGIT (asmspec[i])) - break; - if (asmspec[0] != 0 && i < 0) + if (ISDIGIT (asmspec[0])) { - i = atoi (asmspec); - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) - return i; - else - return -2; + char *pend; + errno = 0; + unsigned long j = strtoul (asmspec, &pend, 10); + if (*pend == '\0') + { + static_assert (FIRST_PSEUDO_REGISTER <= INT_MAX, ""); + if (errno != ERANGE + && j < FIRST_PSEUDO_REGISTER + && reg_names[j][0]) + return j; + else + return -2; + } } for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) --- gcc/testsuite/gcc.dg/pr114540.c.jj 2024-12-02 19:49:40.603841390 +0100 +++ gcc/testsuite/gcc.dg/pr114540.c 2024-12-02 19:58:29.021425513 +0100 @@ -0,0 +1,31 @@ +/* PR middle-end/114540 */ +/* { dg-do compile } */ +/* { dg-options "" } */ + +void +foo () +{ + asm ("" : : : "2147483648"); /* { dg-error "unknown register name" } */ + asm ("" : : : "4294967296"); /* { dg-error "unknown register name" } */ + asm ("" : : : "18446744073709551616"); /* { dg-error "unknown register name" } */ + asm ("" : : : "9223372036854775808"); /* { dg-error "unknown register name" } */ + asm ("" : : : "9223372036854775807"); /* { dg-error "unknown register name" } */ + asm ("" : : : "2147483647"); /* { dg-error "unknown register name" } */ + asm ("" : : : "4&"); /* { dg-error "unknown register name" } */ + asm ("" : : : "1'0"); /* { dg-error "unknown register name" } */ +#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__s390__) || defined(__aarch64__) || defined(__arm__) + asm ("" : : : "0"); +#endif + + register int a asm("2147483648"); /* { dg-error "invalid register name for" } */ + register int b asm("4294967296"); /* { dg-error "invalid register name for" } */ + register int c asm("18446744073709551616"); /* { dg-error "invalid register name for" } */ + register int d asm("9223372036854775808"); /* { dg-error "invalid register name for" } */ + register int e asm("9223372036854775807"); /* { dg-error "invalid register name for" } */ + register int f asm("2147483647"); /* { dg-error "invalid register name for" } */ + register int g asm("4&"); /* { dg-error "invalid register name for" } */ + register int h asm("1'0"); /* { dg-error "invalid register name for" } */ +#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__s390__) || defined(__aarch64__) || defined(__arm__) + register int i asm("0"); +#endif +} Jakub
diff --git a/gcc/varasm.cc b/gcc/varasm.cc index acc4b4a0419..ee9df83e0e1 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -993,9 +993,12 @@ decode_reg_name_and_count (const char *asmspec, int *pnregs) break; if (asmspec[0] != 0 && i < 0) { - i = atoi (asmspec); - if (i < FIRST_PSEUDO_REGISTER && i >= 0 && reg_names[i][0]) - return i; + char *pend{}; + errno = 0; + long j = strtol (asmspec, &pend, 10); + if (errno != ERANGE && j <= INT_MAX + && j < FIRST_PSEUDO_REGISTER && j >= 0 && reg_names[j][0]) + return j; else return -2; }