Message ID | 20230616083412.1877704-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V3,1/4] rs6000: build constant via li;rotldi | expand |
Hi! On Fri, Jun 16, 2023 at 04:34:12PM +0800, Jiufu Guo wrote: > +/* Check if value C can be built by 2 instructions: one is 'li', another is > + rotldi. > + > + If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK > + is set to -1, and return true. Return false otherwise. */ Don't say "is set to -1", the point of having this is so you say "is set to the "li" value". Just like you describe what SHIFT is for. > +static bool > +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift, > + HOST_WIDE_INT *mask) > +{ > + int n; Put shis later, like: > + /* Check if C can be rotated to a positive or negative value > + which 'li' instruction is able to load. */ int n; > + if (can_be_rotated_to_lowbits (c, 15, &n) > + || can_be_rotated_to_lowbits (~c, 15, &n)) > + { > + *mask = HOST_WIDE_INT_M1; > + *shift = HOST_BITS_PER_WIDE_INT - n; > + return true; > + } It is tricky to see ~c will always work, since what is really done is -c instead. Can you just use that here? > @@ -10266,15 +10291,14 @@ static void > rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > { > rtx temp; > + int shift; > + HOST_WIDE_INT mask; > HOST_WIDE_INT ud1, ud2, ud3, ud4; > > ud1 = c & 0xffff; > - c = c >> 16; > - ud2 = c & 0xffff; > - c = c >> 16; > - ud3 = c & 0xffff; > - c = c >> 16; > - ud4 = c & 0xffff; > + ud2 = (c >> 16) & 0xffff; > + ud3 = (c >> 32) & 0xffff; > + ud4 = (c >> 48) & 0xffff; > > if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) > || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) > @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > emit_move_insn (dest, gen_rtx_XOR (DImode, temp, > GEN_INT ((ud2 ^ 0xffff) << 16))); > } > + else if (can_be_built_by_li_and_rotldi (c, &shift, &mask)) > + { > + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + unsigned HOST_WIDE_INT imm = (c | ~mask); > + imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); > + > + emit_move_insn (temp, GEN_INT (imm)); > + if (shift != 0) > + temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); > + emit_move_insn (dest, temp); > + } If you would rewrite so it isn't such a run-on thing with "else if", instead using early outs, or even some factoring, you could declare the variable used only in a tiny scope in that tiny scope instead. > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c > @@ -0,0 +1,54 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -save-temps" } */ > +/* { dg-require-effective-target has_arch_ppc64 } */ Please put a tiny comment here saying what this test is *for*? The file name is a bit of hint already, but you can indicate much more in one or two lines :-) With those adjustments, okay for trunk. Thanks! (If -c doesn't work, it needs more explanation). Segher
Hi! Segher Boessenkool <segher@kernel.crashing.org> writes: > Hi! > > On Fri, Jun 16, 2023 at 04:34:12PM +0800, Jiufu Guo wrote: >> +/* Check if value C can be built by 2 instructions: one is 'li', another is >> + rotldi. >> + >> + If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK >> + is set to -1, and return true. Return false otherwise. */ > > Don't say "is set to -1", the point of having this is so you say "is set > to the "li" value". Just like you describe what SHIFT is for. Yes, thanks! > >> +static bool >> +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift, >> + HOST_WIDE_INT *mask) >> +{ >> + int n; > > Put shis later, like: Thanks! > >> + /* Check if C can be rotated to a positive or negative value >> + which 'li' instruction is able to load. */ > int n; >> + if (can_be_rotated_to_lowbits (c, 15, &n) >> + || can_be_rotated_to_lowbits (~c, 15, &n)) >> + { >> + *mask = HOST_WIDE_INT_M1; >> + *shift = HOST_BITS_PER_WIDE_INT - n; >> + return true; >> + } > > It is tricky to see ~c will always work, since what is really done is -c > instead. Can you just use that here? Some explanation: A negative value of 'li' is: 0b11..11xxx there are 49 leading '1's, and the other 15 tailing bits can be 0 or 1. With the '~' operation, there are 49 '0's. After the value is rotated, there are still 49 '1's. (xxx may also be at head/tail.) For the rotated value, with the '~' operation, there are still 49 '0's. So, for a value, if there are 49 successive '1's (may cross head/tail). It should be able to rotate to low 15 bits after the '~' operation. It would not be enough if using the '-' operation, since '-x=~x+1' in the bit aspect. As the below case 'li_rotldi_3': 0xffff8531ffffffffLL (rotate left 0x8531 32bit). The '~c' is 0x7ace00000000, this can be rotated from 0x7ace. (~0x8531). But '-c' is 0x7ace00000001. this value is not good. > >> @@ -10266,15 +10291,14 @@ static void >> rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >> { >> rtx temp; >> + int shift; >> + HOST_WIDE_INT mask; >> HOST_WIDE_INT ud1, ud2, ud3, ud4; >> >> ud1 = c & 0xffff; >> - c = c >> 16; >> - ud2 = c & 0xffff; >> - c = c >> 16; >> - ud3 = c & 0xffff; >> - c = c >> 16; >> - ud4 = c & 0xffff; >> + ud2 = (c >> 16) & 0xffff; >> + ud3 = (c >> 32) & 0xffff; >> + ud4 = (c >> 48) & 0xffff; >> >> if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) >> || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) >> @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >> emit_move_insn (dest, gen_rtx_XOR (DImode, temp, >> GEN_INT ((ud2 ^ 0xffff) << 16))); >> } >> + else if (can_be_built_by_li_and_rotldi (c, &shift, &mask)) >> + { >> + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + unsigned HOST_WIDE_INT imm = (c | ~mask); >> + imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); >> + >> + emit_move_insn (temp, GEN_INT (imm)); >> + if (shift != 0) >> + temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); >> + emit_move_insn (dest, temp); >> + } > > If you would rewrite so it isn't such a run-on thing with "else if", > instead using early outs, or even some factoring, you could declare the > variable used only in a tiny scope in that tiny scope instead. Yes! Early returning is better for a lot of cases. I would like to have a refactor patch. > >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c >> @@ -0,0 +1,54 @@ >> +/* { dg-do run } */ >> +/* { dg-options "-O2 -save-temps" } */ >> +/* { dg-require-effective-target has_arch_ppc64 } */ > > Please put a tiny comment here saying what this test is *for*? The file > name is a bit of hint already, but you can indicate much more in one or > two lines :-) Oh, yes, thanks for point out this! > > With those adjustments, okay for trunk. Thanks! > > (If -c doesn't work, it needs more explanation). Sure, some words as above. BR, Jeff (Jiufu Guo) > > > Segher
Hi, Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi! > > Segher Boessenkool <segher@kernel.crashing.org> writes: > >> Hi! >> >> On Fri, Jun 16, 2023 at 04:34:12PM +0800, Jiufu Guo wrote: >>> +/* Check if value C can be built by 2 instructions: one is 'li', another is >>> + rotldi. >>> + >>> + If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK >>> + is set to -1, and return true. Return false otherwise. */ >> >> Don't say "is set to -1", the point of having this is so you say "is set >> to the "li" value". Just like you describe what SHIFT is for. > Yes, thanks! >> >>> +static bool >>> +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift, >>> + HOST_WIDE_INT *mask) >>> +{ >>> + int n; >> >> Put shis later, like: > Thanks! >> >>> + /* Check if C can be rotated to a positive or negative value >>> + which 'li' instruction is able to load. */ >> int n; >>> + if (can_be_rotated_to_lowbits (c, 15, &n) >>> + || can_be_rotated_to_lowbits (~c, 15, &n)) >>> + { >>> + *mask = HOST_WIDE_INT_M1; >>> + *shift = HOST_BITS_PER_WIDE_INT - n; >>> + return true; >>> + } >> >> It is tricky to see ~c will always work, since what is really done is -c >> instead. Can you just use that here? > > Some explanation: > A negative value of 'li' is: > 0b11..11xxx there are 49 leading '1's, and the other 15 tailing bits can > be 0 or 1. With the '~' operation, there are 49 '0's. > After the value is rotated, there are still 49 '1's. (xxx may also be > at head/tail.) > For the rotated value, with the '~' operation, there are still 49 '0's. > > So, for a value, if there are 49 successive '1's (may cross head/tail). > It should be able to rotate to low 15 bits after the '~' operation. > > It would not be enough if using the '-' operation, since '-x=~x+1' in > the bit aspect. As the below case 'li_rotldi_3': 0xffff8531ffffffffLL > (rotate left 0x8531 32bit). > The '~c' is 0x7ace00000000, this can be rotated from 0x7ace. (~0x8531). > But '-c' is 0x7ace00000001. this value is not good. > >> >>> @@ -10266,15 +10291,14 @@ static void >>> rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >>> { >>> rtx temp; >>> + int shift; >>> + HOST_WIDE_INT mask; >>> HOST_WIDE_INT ud1, ud2, ud3, ud4; >>> >>> ud1 = c & 0xffff; >>> - c = c >> 16; >>> - ud2 = c & 0xffff; >>> - c = c >> 16; >>> - ud3 = c & 0xffff; >>> - c = c >> 16; >>> - ud4 = c & 0xffff; >>> + ud2 = (c >> 16) & 0xffff; >>> + ud3 = (c >> 32) & 0xffff; >>> + ud4 = (c >> 48) & 0xffff; >>> >>> if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) >>> || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) >>> @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >>> emit_move_insn (dest, gen_rtx_XOR (DImode, temp, >>> GEN_INT ((ud2 ^ 0xffff) << 16))); >>> } >>> + else if (can_be_built_by_li_and_rotldi (c, &shift, &mask)) >>> + { >>> + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >>> + unsigned HOST_WIDE_INT imm = (c | ~mask); >>> + imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); >>> + >>> + emit_move_insn (temp, GEN_INT (imm)); >>> + if (shift != 0) >>> + temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); >>> + emit_move_insn (dest, temp); >>> + } >> >> If you would rewrite so it isn't such a run-on thing with "else if", >> instead using early outs, or even some factoring, you could declare the >> variable used only in a tiny scope in that tiny scope instead. > > Yes! Early returning is better for a lot of cases. I would like > to have a refactor patch. > >> >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c >>> @@ -0,0 +1,54 @@ >>> +/* { dg-do run } */ >>> +/* { dg-options "-O2 -save-temps" } */ >>> +/* { dg-require-effective-target has_arch_ppc64 } */ >> >> Please put a tiny comment here saying what this test is *for*? The file >> name is a bit of hint already, but you can indicate much more in one or >> two lines :-) > > Oh, yes, thanks for point out this! > >> >> With those adjustments, okay for trunk. Thanks! >> >> (If -c doesn't work, it needs more explanation). The patch is updated, and attached below. If ok, I would like to commit the patch accordingly. BR, Jeff (Jiufu Guo) If a constant is possible to be rotated to/from a positive or negative value from "li", then "li;rotldi" can be used to build the constant. gcc/ChangeLog: * config/rs6000/rs6000.cc (can_be_built_by_li_and_rotldi): New function. (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi. gcc/testsuite/ChangeLog: * gcc.target/powerpc/const-build.c: New test. --- gcc/config/rs6000/rs6000.cc | 47 +++++++++++++-- .../gcc.target/powerpc/const-build.c | 57 +++++++++++++++++++ 2 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 42f49e4a56b..acc332acc05 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10258,6 +10258,31 @@ rs6000_emit_set_const (rtx dest, rtx source) return true; } +/* Check if value C can be built by 2 instructions: one is 'li', another is + rotldi. + + If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK + is set to the mask operand of rotldi(rldicl), and return true. + Return false otherwise. */ + +static bool +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift, + HOST_WIDE_INT *mask) +{ + /* If C or ~C contains at least 49 successive zeros, then C can be rotated + to/from a positive or negative value that 'li' is able to load. */ + int n; + if (can_be_rotated_to_lowbits (c, 15, &n) + || can_be_rotated_to_lowbits (~c, 15, &n)) + { + *mask = HOST_WIDE_INT_M1; + *shift = HOST_BITS_PER_WIDE_INT - n; + return true; + } + + return false; +} + /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. Output insns to set DEST equal to the constant C as a series of lis, ori and shl instructions. */ @@ -10266,15 +10291,14 @@ static void rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) { rtx temp; + int shift; + HOST_WIDE_INT mask; HOST_WIDE_INT ud1, ud2, ud3, ud4; ud1 = c & 0xffff; - c = c >> 16; - ud2 = c & 0xffff; - c = c >> 16; - ud3 = c & 0xffff; - c = c >> 16; - ud4 = c & 0xffff; + ud2 = (c >> 16) & 0xffff; + ud3 = (c >> 32) & 0xffff; + ud4 = (c >> 48) & 0xffff; if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT ((ud2 ^ 0xffff) << 16))); } + else if (can_be_built_by_li_and_rotldi (c, &shift, &mask)) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + unsigned HOST_WIDE_INT imm = (c | ~mask); + imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); + + emit_move_insn (temp, GEN_INT (imm)); + if (shift != 0) + temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); + emit_move_insn (dest, temp); + } else if (ud3 == 0 && ud4 == 0) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c b/gcc/testsuite/gcc.target/powerpc/const-build.c new file mode 100644 index 00000000000..69b37e2bb53 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -save-temps" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ + +/* Verify that two instructions are sucessfully used to build constants. + One insn is li or lis, another is rotate: rldicl, rldicr or rldic. */ + +#define NOIPA __attribute__ ((noipa)) + +struct fun +{ + long long (*f) (void); + long long val; +}; + +long long NOIPA +li_rotldi_1 (void) +{ + return 0x7531000000000LL; +} + +long long NOIPA +li_rotldi_2 (void) +{ + return 0x2100000000000064LL; +} + +long long NOIPA +li_rotldi_3 (void) +{ + return 0xffff8531ffffffffLL; +} + +long long NOIPA +li_rotldi_4 (void) +{ + return 0x21ffffffffffff94LL; +} + +struct fun arr[] = { + {li_rotldi_1, 0x7531000000000LL}, + {li_rotldi_2, 0x2100000000000064LL}, + {li_rotldi_3, 0xffff8531ffffffffLL}, + {li_rotldi_4, 0x21ffffffffffff94LL}, +}; + +/* { dg-final { scan-assembler-times {\mrotldi\M} 4 } } */ + +int +main () +{ + for (int i = 0; i < sizeof (arr) / sizeof (arr[0]); i++) + if ((*arr[i].f) () != arr[i].val) + __builtin_abort (); + + return 0; +}
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 42f49e4a56b..13aafd1360a 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10258,6 +10258,31 @@ rs6000_emit_set_const (rtx dest, rtx source) return true; } +/* Check if value C can be built by 2 instructions: one is 'li', another is + rotldi. + + If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK + is set to -1, and return true. Return false otherwise. */ + +static bool +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift, + HOST_WIDE_INT *mask) +{ + int n; + + /* Check if C can be rotated to a positive or negative value + which 'li' instruction is able to load. */ + if (can_be_rotated_to_lowbits (c, 15, &n) + || can_be_rotated_to_lowbits (~c, 15, &n)) + { + *mask = HOST_WIDE_INT_M1; + *shift = HOST_BITS_PER_WIDE_INT - n; + return true; + } + + return false; +} + /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. Output insns to set DEST equal to the constant C as a series of lis, ori and shl instructions. */ @@ -10266,15 +10291,14 @@ static void rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) { rtx temp; + int shift; + HOST_WIDE_INT mask; HOST_WIDE_INT ud1, ud2, ud3, ud4; ud1 = c & 0xffff; - c = c >> 16; - ud2 = c & 0xffff; - c = c >> 16; - ud3 = c & 0xffff; - c = c >> 16; - ud4 = c & 0xffff; + ud2 = (c >> 16) & 0xffff; + ud3 = (c >> 32) & 0xffff; + ud4 = (c >> 48) & 0xffff; if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT ((ud2 ^ 0xffff) << 16))); } + else if (can_be_built_by_li_and_rotldi (c, &shift, &mask)) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + unsigned HOST_WIDE_INT imm = (c | ~mask); + imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); + + emit_move_insn (temp, GEN_INT (imm)); + if (shift != 0) + temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); + emit_move_insn (dest, temp); + } else if (ud3 == 0 && ud4 == 0) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c b/gcc/testsuite/gcc.target/powerpc/const-build.c new file mode 100644 index 00000000000..70f095f6bf2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -save-temps" } */ +/* { dg-require-effective-target has_arch_ppc64 } */ + +#define NOIPA __attribute__ ((noipa)) + +struct fun +{ + long long (*f) (void); + long long val; +}; + +long long NOIPA +li_rotldi_1 (void) +{ + return 0x7531000000000LL; +} + +long long NOIPA +li_rotldi_2 (void) +{ + return 0x2100000000000064LL; +} + +long long NOIPA +li_rotldi_3 (void) +{ + return 0xffff8531ffffffffLL; +} + +long long NOIPA +li_rotldi_4 (void) +{ + return 0x21ffffffffffff94LL; +} + +struct fun arr[] = { + {li_rotldi_1, 0x7531000000000LL}, + {li_rotldi_2, 0x2100000000000064LL}, + {li_rotldi_3, 0xffff8531ffffffffLL}, + {li_rotldi_4, 0x21ffffffffffff94LL}, +}; + +/* { dg-final { scan-assembler-times {\mrotldi\M} 4 } } */ + +int +main () +{ + for (int i = 0; i < sizeof (arr) / sizeof (arr[0]); i++) + if ((*arr[i].f) () != arr[i].val) + __builtin_abort (); + + return 0; +}