Message ID | 20200427195309.GC24986@ibm-tinman.the-meissners.org |
---|---|
State | New |
Headers | show |
Series | PowerPC -mcpu=future Patch 3 of 7, Add test for generating prefixed load/store | expand |
On Mon, 2020-04-27 at 15:53 -0400, Michael Meissner via Gcc-patches wrote: > This patch adds a test that verifies that the compiler generates a prefixed > load/store instruction where the compiler cannot generate the instruction > directly because the offset is not a valid DS or DQ offset. A DS offset must > have the bottom 2 bits clear. A DQ offset must have the bottom 4 bits clear. > Due to the way PowerPC instructions are encoded, some instructions use the DS > format and some use the DQ format. > > This is patch #3 of 7. The tests in this patch run on a little endian power8 > system running Linux. > > 2020-04-27 Michael Meissner <meissner@linux.ibm.com> > > * gcc.target/powerpc/prefix-ds-dq.c: New test to verify that we > generate the prefix load/store instructions for traditional > instructions with an offset that doesn't match DS/DQ > requirements. Can probably safely be shortened to end at "... for traditional instructions." if not even just "New test". > > --- /tmp/dtFbYL_prefix-ds-dq.c 2020-04-27 13:54:38.350850944 -0400 > +++ gcc/testsuite/gcc.target/powerpc/prefix-ds-dq.c 2020-04-27 13:54:15.301160847 -0400 > @@ -0,0 +1,156 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_prefixed_addr } */ > +/* { dg-options "-O2 -mdejagnu-cpu=future" } */ > + > +/* Tests whether we generate a prefixed load/store operation for addresses that > + don't meet DS/DQ offset constraints. */ > + > +unsigned long > +load_uc_offset1 (unsigned char *p) > +{ > + return p[1]; /* should generate LBZ. */ > +} > + > +long > +load_sc_offset1 (signed char *p) > +{ > + return p[1]; /* should generate LBZ + EXTSB. */ > +} OCD kicks in and says the above two (and a couple below) have an extra tab. nbd. :-) > + > +unsigned long > +load_us_offset1 (unsigned char *p) > +{ > + return *(unsigned short *)(p + 1); /* should generate LHZ. */ > +} > + > +long > +load_ss_offset1 (unsigned char *p) > +{ > + return *(short *)(p + 1); /* should generate LHA. */ > +} > + > +unsigned long > +load_ui_offset1 (unsigned char *p) > +{ > + return *(unsigned int *)(p + 1); /* should generate LWZ. */ > +} > + > +long > +load_si_offset1 (unsigned char *p) > +{ > + return *(int *)(p + 1); /* should generate PLWA. */ > +} > + > +unsigned long > +load_ul_offset1 (unsigned char *p) > +{ > + return *(unsigned long *)(p + 1); /* should generate PLD. */ > +} > + > +long > +load_sl_offset1 (unsigned char *p) > +{ > + return *(long *)(p + 1); /* should generate PLD. */ > +} > + > +float > +load_float_offset1 (unsigned char *p) > +{ > + return *(float *)(p + 1); /* should generate LFS. */ > +} > + > +double > +load_double_offset1 (unsigned char *p) > +{ > + return *(double *)(p + 1); /* should generate LFD. */ > +} > + > +__float128 > +load_float128_offset1 (unsigned char *p) > +{ > + return *(__float128 *)(p + 1); /* should generate PLXV. */ > +} > + > +void > +store_uc_offset1 (unsigned char uc, unsigned char *p) > +{ > + p[1] = uc; /* should generate STB. */ > +} > + > +void > +store_sc_offset1 (signed char sc, signed char *p) > +{ > + p[1] = sc; /* should generate STB. */ > +} > + > +void > +store_us_offset1 (unsigned short us, unsigned char *p) > +{ > + *(unsigned short *)(p + 1) = us; /* should generate STH. */ > +} > + > +void > +store_ss_offset1 (signed short ss, unsigned char *p) > +{ > + *(signed short *)(p + 1) = ss; /* should generate STH. */ > +} > + > +void > +store_ui_offset1 (unsigned int ui, unsigned char *p) > +{ > + *(unsigned int *)(p + 1) = ui; /* should generate STW. */ > +} > + > +void > +store_si_offset1 (signed int si, unsigned char *p) > +{ > + *(signed int *)(p + 1) = si; /* should generate STW. */ > +} > + > +void > +store_ul_offset1 (unsigned long ul, unsigned char *p) > +{ > + *(unsigned long *)(p + 1) = ul; /* should generate PSTD. */ > +} > + > +void > +store_sl_offset1 (signed long sl, unsigned char *p) > +{ > + *(signed long *)(p + 1) = sl; /* should generate PSTD. */ > +} > + > +void > +store_float_offset1 (float f, unsigned char *p) > +{ > + *(float *)(p + 1) = f; /* should generate STF. */ Comment should be STFD ? > +} > + > +void > +store_double_offset1 (double d, unsigned char *p) > +{ > + *(double *)(p + 1) = d; /* should generate STD. */ Comment should be STFS ? > +} > + > +void > +store_float128_offset1 (__float128 f128, unsigned char *p) > +{ > + *(__float128 *)(p + 1) = f128; /* should generate PSTXV. */ > +} > + > +/* { dg-final { scan-assembler-times {\mextsb\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mlbz\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mlfd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mlfs\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mlha\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mlhz\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mlwz\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mpld\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mplwa\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mplxv\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mpstd\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mpstxv\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mstb\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mstfd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mstfs\M} 1 } } */ I wasn't able to matche these two up with the comments above. (see above). beyond those nits, lgtm. thanks, -Will > +/* { dg-final { scan-assembler-times {\msth\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mstw\M} 2 } } */ >
Hi! On Mon, Apr 27, 2020 at 05:01:34PM -0500, will schmidt wrote: > On Mon, 2020-04-27 at 15:53 -0400, Michael Meissner via Gcc-patches wrote: > > This patch adds a test that verifies that the compiler generates a prefixed > > load/store instruction where the compiler cannot generate the instruction > > directly because the offset is not a valid DS or DQ offset. A DS offset must > > have the bottom 2 bits clear. A DQ offset must have the bottom 4 bits clear. > > Due to the way PowerPC instructions are encoded, some instructions use the DS > > format and some use the DQ format. > > > > This is patch #3 of 7. The tests in this patch run on a little endian power8 > > system running Linux. > > > > 2020-04-27 Michael Meissner <meissner@linux.ibm.com> > > > > * gcc.target/powerpc/prefix-ds-dq.c: New test to verify that we > > generate the prefix load/store instructions for traditional > > instructions with an offset that doesn't match DS/DQ > > requirements. > > Can probably safely be shortened to end at "... for traditional > instructions." if not even just "New test". Yes, just "New." or "New test." please. An explanation what the test tests belongs in the test itself (it already is there in this case). > > +/* { dg-require-effective-target powerpc_prefixed_addr } */ > > +/* { dg-options "-O2 -mdejagnu-cpu=future" } */ Same as for all other tests... What do the effective targets mean exactly, do they require some -mcpu=, etc. > > +unsigned long > > +load_us_offset1 (unsigned char *p) > > +{ > > + return *(unsigned short *)(p + 1); /* should generate LHZ. */ > > +} This violates aliasing rules (without -fno-strict-aliasing), unless p points to some "short" object -- in which case it has alignment 2 already! So this testcase needs -fno-strict-aliasing, but also, you need to make sure GCC does not assume alignment 2 (after the cast) already. There are attributes for this. > > +void > > +store_ul_offset1 (unsigned long ul, unsigned char *p) > > +{ > > + *(unsigned long *)(p + 1) = ul; /* should generate PSTD. */ > > +} > > + > > +void > > +store_sl_offset1 (signed long sl, unsigned char *p) > > +{ > > + *(signed long *)(p + 1) = sl; /* should generate PSTD. */ > > +} "long long". > > +void > > +store_float_offset1 (float f, unsigned char *p) > > +{ > > + *(float *)(p + 1) = f; /* should generate STF. */ > > Comment should be STFD ? > > > +} > > + > > +void > > +store_double_offset1 (double d, unsigned char *p) > > +{ > > + *(double *)(p + 1) = d; /* should generate STD. */ > > Comment should be STFS ? The other way around :-) stfs is for single precision float ("float", in C), while stfd is for double precision float ("double", in C). > > +/* { dg-final { scan-assembler-times {\mextsb\M} 1 } } */ Maybe also test there are no extsh and extsw generated? Segher
On Fri, 2020-05-01 at 10:48 -0500, Segher Boessenkool wrote: > Hi! > > On Mon, Apr 27, 2020 at 05:01:34PM -0500, will schmidt wrote: > > On Mon, 2020-04-27 at 15:53 -0400, Michael Meissner via Gcc-patches > > wrote: > > > This patch adds a test that verifies that the compiler generates > > > a prefixed > > > load/store instruction where the compiler cannot generate the > > > instruction > > > directly because the offset is not a valid DS or DQ offset. A DS > > > offset must > > > have the bottom 2 bits clear. A DQ offset must have the bottom 4 > > > bits clear. > > > Due to the way PowerPC instructions are encoded, some > > > instructions use the DS > > > format and some use the DQ format. > > > > > > This is patch #3 of 7. The tests in this patch run on a little > > > endian power8 > > > system running Linux. > > > > > > <snip> > "long long". > > > > +void > > > +store_float_offset1 (float f, unsigned char *p) > > > +{ > > > + *(float *)(p + 1) = f; /* should generate STF. */ > > > > Comment should be STFD ? > > > > > +} > > > + > > > +void > > > +store_double_offset1 (double d, unsigned char *p) > > > +{ > > > + *(double *)(p + 1) = d; /* should generate > > > STD. */ > > > > Comment should be STFS ? > > The other way around :-) stfs is for single precision float > ("float", > in C), while stfd is for double precision float ("double", in C). I came up with my comment based on what was being tested for further below. If the comment is correct the test below is wrong. (unless i mis-counted something .. always possible but worth double- checking). :-) Thanks, -Will > > > > +/* { dg-final { scan-assembler-times {\mextsb\M} 1 } } */ > > Maybe also test there are no extsh and extsw generated? > > > Segher
On Fri, May 01, 2020 at 03:54:26PM -0500, will schmidt wrote: > > The other way around :-) stfs is for single precision float > > ("float", > > in C), while stfd is for double precision float ("double", in C). > > I came up with my comment based on what was being tested for further > below. If the comment is correct the test below is wrong. > (unless i mis-counted something .. always possible but worth double- > checking). :-) Ah. The order of the scan-assembler statements does not matter; for each, dejagnu looks at the whole file. (And both instructions are generated only once). Segher
On Fri, 2020-05-01 at 17:02 -0500, Segher Boessenkool wrote: > On Fri, May 01, 2020 at 03:54:26PM -0500, will schmidt wrote: > > > The other way around :-) stfs is for single precision float > > > ("float", > > > in C), while stfd is for double precision float ("double", in C). > > > > I came up with my comment based on what was being tested for > > further > > below. If the comment is correct the test below is wrong. > > (unless i mis-counted something .. always possible but worth > > double- > > checking). :-) > > Ah. The order of the scan-assembler statements does not matter; for > each, dejagnu looks at the whole file. (And both instructions are > generated only once). > Ok, yeah, right, I misread/misunderstood your comment.. :-) Gist of it was that there is a mismatch between asm and comments. + *(float *)(p + 1) = f; /* should generate STFS. */ + *(double *)(p + 1) = d; /* should generate STFD. */ +/* { dg-final { scan-assembler-times {\mstfd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstfs\M} 1 } } */ thanks, -Will > > Segher
On Fri, 1 May 2020, Segher Boessenkool wrote: > On Mon, Apr 27, 2020 at 05:01:34PM -0500, will schmidt wrote: > > On Mon, 2020-04-27 at 15:53 -0400, Michael Meissner via Gcc-patches wrote: > > > +unsigned long > > > +load_us_offset1 (unsigned char *p) > > > +{ > > > + return *(unsigned short *)(p + 1); /* should generate LHZ. */ > > > +} > > This violates aliasing rules (without -fno-strict-aliasing), unless p > points to some "short" object -- in which case it has alignment 2 > already! Ackchyually... that'd be 'unless p+1 points to some "short" object'. You're allowed to do weird things passing char pointers around (at least), as long as you point to the right-type object (the one used when it was assigned) when you dereference it. Though, I don't know if the same goes if p was something other than a "char *" or "unsigned char *". > So this testcase needs -fno-strict-aliasing, but also, you need to make > sure GCC does not assume alignment 2 (after the cast) already. There > are attributes for this. FWIW, if it needs -fno-strict-aliasing it's for some other reason than the above (we don't see the assigner in the test). brgds, H-P
Hi! On Tue, May 05, 2020 at 08:41:35PM -0400, Hans-Peter Nilsson wrote: > On Fri, 1 May 2020, Segher Boessenkool wrote: > > On Mon, Apr 27, 2020 at 05:01:34PM -0500, will schmidt wrote: > > > On Mon, 2020-04-27 at 15:53 -0400, Michael Meissner via Gcc-patches wrote: > > > > +unsigned long > > > > +load_us_offset1 (unsigned char *p) > > > > +{ > > > > + return *(unsigned short *)(p + 1); /* should generate LHZ. */ > > > > +} > > > > This violates aliasing rules (without -fno-strict-aliasing), unless p > > points to some "short" object -- in which case it has alignment 2 > > already! > > Ackchyually... that'd be 'unless p+1 points to some "short" object'. Erm yes. > You're allowed to do weird things passing char pointers around > (at least), as long as you point to the right-type object (the > one used when it was assigned) when you dereference it. And when you cast it to the "bigger" pointer type, already, in general. Which isn't a problem for any of the powerpc targets at least, not sure there is any GCC target where it matters currently? > Though, I don't know if the same goes if p was something other > than a "char *" or "unsigned char *". Sure, it is true always: you can do whatever you want with the address, cast it (validly) to other pointer types, to integer types, slice it, dice it, whatever you want; but when you dereference it, *that* is where the "aliasing rules" apply. > > So this testcase needs -fno-strict-aliasing, but also, you need to make > > sure GCC does not assume alignment 2 (after the cast) already. There > > are attributes for this. > > FWIW, if it needs -fno-strict-aliasing it's for some other > reason than the above (we don't see the assigner in the test). Not to be valid code, no. But for the testcase to test what it wants to test, it would help. The point is that you can do a misaligned "lhz" instruction just fine; you do not need a prefixed form for it, as with say "ld", where the offset addressing form always does a multiple of four ("DS-form"), while "pld" (its prefixed version) can take any integer offset ("D-form"). If GCC already would assume the memory access is naturally aligned the test doesn't test anything. But together with it choosing the prefixed forms for ld and lwa etc., it's probably good enough anyway. Segher
--- /tmp/dtFbYL_prefix-ds-dq.c 2020-04-27 13:54:38.350850944 -0400 +++ gcc/testsuite/gcc.target/powerpc/prefix-ds-dq.c 2020-04-27 13:54:15.301160847 -0400 @@ -0,0 +1,156 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_prefixed_addr } */ +/* { dg-options "-O2 -mdejagnu-cpu=future" } */ + +/* Tests whether we generate a prefixed load/store operation for addresses that + don't meet DS/DQ offset constraints. */ + +unsigned long +load_uc_offset1 (unsigned char *p) +{ + return p[1]; /* should generate LBZ. */ +} + +long +load_sc_offset1 (signed char *p) +{ + return p[1]; /* should generate LBZ + EXTSB. */ +} + +unsigned long +load_us_offset1 (unsigned char *p) +{ + return *(unsigned short *)(p + 1); /* should generate LHZ. */ +} + +long +load_ss_offset1 (unsigned char *p) +{ + return *(short *)(p + 1); /* should generate LHA. */ +} + +unsigned long +load_ui_offset1 (unsigned char *p) +{ + return *(unsigned int *)(p + 1); /* should generate LWZ. */ +} + +long +load_si_offset1 (unsigned char *p) +{ + return *(int *)(p + 1); /* should generate PLWA. */ +} + +unsigned long +load_ul_offset1 (unsigned char *p) +{ + return *(unsigned long *)(p + 1); /* should generate PLD. */ +} + +long +load_sl_offset1 (unsigned char *p) +{ + return *(long *)(p + 1); /* should generate PLD. */ +} + +float +load_float_offset1 (unsigned char *p) +{ + return *(float *)(p + 1); /* should generate LFS. */ +} + +double +load_double_offset1 (unsigned char *p) +{ + return *(double *)(p + 1); /* should generate LFD. */ +} + +__float128 +load_float128_offset1 (unsigned char *p) +{ + return *(__float128 *)(p + 1); /* should generate PLXV. */ +} + +void +store_uc_offset1 (unsigned char uc, unsigned char *p) +{ + p[1] = uc; /* should generate STB. */ +} + +void +store_sc_offset1 (signed char sc, signed char *p) +{ + p[1] = sc; /* should generate STB. */ +} + +void +store_us_offset1 (unsigned short us, unsigned char *p) +{ + *(unsigned short *)(p + 1) = us; /* should generate STH. */ +} + +void +store_ss_offset1 (signed short ss, unsigned char *p) +{ + *(signed short *)(p + 1) = ss; /* should generate STH. */ +} + +void +store_ui_offset1 (unsigned int ui, unsigned char *p) +{ + *(unsigned int *)(p + 1) = ui; /* should generate STW. */ +} + +void +store_si_offset1 (signed int si, unsigned char *p) +{ + *(signed int *)(p + 1) = si; /* should generate STW. */ +} + +void +store_ul_offset1 (unsigned long ul, unsigned char *p) +{ + *(unsigned long *)(p + 1) = ul; /* should generate PSTD. */ +} + +void +store_sl_offset1 (signed long sl, unsigned char *p) +{ + *(signed long *)(p + 1) = sl; /* should generate PSTD. */ +} + +void +store_float_offset1 (float f, unsigned char *p) +{ + *(float *)(p + 1) = f; /* should generate STF. */ +} + +void +store_double_offset1 (double d, unsigned char *p) +{ + *(double *)(p + 1) = d; /* should generate STD. */ +} + +void +store_float128_offset1 (__float128 f128, unsigned char *p) +{ + *(__float128 *)(p + 1) = f128; /* should generate PSTXV. */ +} + +/* { dg-final { scan-assembler-times {\mextsb\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlbz\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlfd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlfs\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlha\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlhz\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlwz\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mpld\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mplwa\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mplxv\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mpstd\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mpstxv\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstb\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstfd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstfs\M} 1 } } */ +/* { dg-final { scan-assembler-times {\msth\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstw\M} 2 } } */