Message ID | 20240612031148.114435-1-hongyu.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | [APX,CCMP] Use ctestcc when comparing to const 0 | expand |
On Wed, Jun 12, 2024 at 5:12 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > Hi, > > For CTEST, we don't have conditional AND so there's no optimization > opportunity to write a new ctest pattern. Emit ctest when ccmp did > comparison to const 0 to save bytes. > > Bootstrapped & regtested under x86-64-pc-linux-gnu. > > Ok for trunk? > > gcc/ChangeLog: > > * config/i386/i386.md (@ccmp<mode>): Use ctestcc when > operands[3] is const0_rtx. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-ccmp-1.c: Adjust output to scan ctest. > * gcc.target/i386/apx-ccmp-2.c: Adjust some condition to > compare with 0. > --- > gcc/config/i386/i386.md | 6 +++++- > gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 10 ++++++---- > gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 4 ++-- > 3 files changed, 13 insertions(+), 7 deletions(-) > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index a64f2ad4f5f..014d48cddd6 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -1522,7 +1522,11 @@ (define_insn "@ccmp<mode>" > [(match_operand:SI 4 "const_0_to_15_operand")] > UNSPEC_APX_DFV)))] > "TARGET_APX_CCMP" > - "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" > + { > + if (operands[3] == const0_rtx && !MEM_P (operands[2])) > + return "ctest%C1{<imodesuffix>}\t%G4 %2, %2"; > + return "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"; > + } This could be implemented as an alternative using "r,C" constraint as the first constraint for operands[2,3]. Then the register allocator will match the constraints for you. Uros. > [(set_attr "type" "icmp") > (set_attr "mode" "<MODE>") > (set_attr "length_immediate" "1") > diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > index e4e112f07e0..a8b70576760 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > @@ -96,9 +96,11 @@ f15 (double a, double b, int c, int d) > > /* { dg-final { scan-assembler-times "ccmpg" 2 } } */ > /* { dg-final { scan-assembler-times "ccmple" 2 } } */ > -/* { dg-final { scan-assembler-times "ccmpne" 4 } } */ > -/* { dg-final { scan-assembler-times "ccmpe" 3 } } */ > +/* { dg-final { scan-assembler-times "ccmpne" 2 } } */ > +/* { dg-final { scan-assembler-times "ccmpe" 1 } } */ > /* { dg-final { scan-assembler-times "ccmpbe" 1 } } */ > +/* { dg-final { scan-assembler-times "ctestne" 2 } } */ > +/* { dg-final { scan-assembler-times "cteste" 2 } } */ > /* { dg-final { scan-assembler-times "ccmpa" 1 } } */ > -/* { dg-final { scan-assembler-times "ccmpbl" 2 } } */ > - > +/* { dg-final { scan-assembler-times "ccmpbl" 1 } } */ > +/* { dg-final { scan-assembler-times "ctestbl" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > index 0123a686d2c..4a0784394c3 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > @@ -12,7 +12,7 @@ int foo_apx(int a, int b, int c, int d) > c += d; > a += b; > sum += a + c; > - if (b != d && sum < c || sum > d) > + if (b > d && sum != 0 || sum > d) > { > b += d; > sum += b; > @@ -32,7 +32,7 @@ int foo_noapx(int a, int b, int c, int d) > c += d; > a += b; > sum += a + c; > - if (b != d && sum < c || sum > d) > + if (b > d && sum != 0 || sum > d) > { > b += d; > sum += b; > -- > 2.31.1 >
On Wed, Jun 12, 2024 at 12:00 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Wed, Jun 12, 2024 at 5:12 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > Hi, > > > > For CTEST, we don't have conditional AND so there's no optimization > > opportunity to write a new ctest pattern. Emit ctest when ccmp did > > comparison to const 0 to save bytes. > > > > Bootstrapped & regtested under x86-64-pc-linux-gnu. > > > > Ok for trunk? > > > > gcc/ChangeLog: > > > > * config/i386/i386.md (@ccmp<mode>): Use ctestcc when > > operands[3] is const0_rtx. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/apx-ccmp-1.c: Adjust output to scan ctest. > > * gcc.target/i386/apx-ccmp-2.c: Adjust some condition to > > compare with 0. > > --- > > gcc/config/i386/i386.md | 6 +++++- > > gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 10 ++++++---- > > gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 4 ++-- > > 3 files changed, 13 insertions(+), 7 deletions(-) > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index a64f2ad4f5f..014d48cddd6 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -1522,7 +1522,11 @@ (define_insn "@ccmp<mode>" > > [(match_operand:SI 4 "const_0_to_15_operand")] > > UNSPEC_APX_DFV)))] > > "TARGET_APX_CCMP" > > - "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" > > + { > > + if (operands[3] == const0_rtx && !MEM_P (operands[2])) > > + return "ctest%C1{<imodesuffix>}\t%G4 %2, %2"; > > + return "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"; > > + } > > This could be implemented as an alternative using "r,C" constraint as > the first constraint for operands[2,3]. Then the register allocator > will match the constraints for you. Like in the attached (lightly tested) patch. Uros. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a64f2ad4f5f..14d4d8cddca 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1515,14 +1515,17 @@ (define_insn "@ccmp<mode>" (match_operator 1 "comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (compare:CC - (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>m,<r>") - (match_operand:SWI 3 "<general_operand>" "<r><i>,<r><m>")) + (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>,<r>m,<r>") + (match_operand:SWI 3 "<general_operand>" "C,<r><i>,<r><m>")) (const_int 0)) (unspec:SI [(match_operand:SI 4 "const_0_to_15_operand")] UNSPEC_APX_DFV)))] "TARGET_APX_CCMP" - "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" + "@ + ctest%C1{<imodesuffix>}\t%G4 %2, %2 + ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3} + ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" [(set_attr "type" "icmp") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")
Thanks for the advice, updated patch in attachment. Bootstrapped/regtested on x86-64-pc-linux-gnu. Ok for trunk? Uros Bizjak <ubizjak@gmail.com> 于2024年6月12日周三 18:12写道: > > On Wed, Jun 12, 2024 at 12:00 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Wed, Jun 12, 2024 at 5:12 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > > > Hi, > > > > > > For CTEST, we don't have conditional AND so there's no optimization > > > opportunity to write a new ctest pattern. Emit ctest when ccmp did > > > comparison to const 0 to save bytes. > > > > > > Bootstrapped & regtested under x86-64-pc-linux-gnu. > > > > > > Ok for trunk? > > > > > > gcc/ChangeLog: > > > > > > * config/i386/i386.md (@ccmp<mode>): Use ctestcc when > > > operands[3] is const0_rtx. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.target/i386/apx-ccmp-1.c: Adjust output to scan ctest. > > > * gcc.target/i386/apx-ccmp-2.c: Adjust some condition to > > > compare with 0. > > > --- > > > gcc/config/i386/i386.md | 6 +++++- > > > gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 10 ++++++---- > > > gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 4 ++-- > > > 3 files changed, 13 insertions(+), 7 deletions(-) > > > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > > index a64f2ad4f5f..014d48cddd6 100644 > > > --- a/gcc/config/i386/i386.md > > > +++ b/gcc/config/i386/i386.md > > > @@ -1522,7 +1522,11 @@ (define_insn "@ccmp<mode>" > > > [(match_operand:SI 4 "const_0_to_15_operand")] > > > UNSPEC_APX_DFV)))] > > > "TARGET_APX_CCMP" > > > - "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" > > > + { > > > + if (operands[3] == const0_rtx && !MEM_P (operands[2])) > > > + return "ctest%C1{<imodesuffix>}\t%G4 %2, %2"; > > > + return "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"; > > > + } > > > > This could be implemented as an alternative using "r,C" constraint as > > the first constraint for operands[2,3]. Then the register allocator > > will match the constraints for you. > > Like in the attached (lightly tested) patch. > > Uros.
On Thu, Jun 13, 2024 at 3:44 AM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote: > > Thanks for the advice, updated patch in attachment. > > Bootstrapped/regtested on x86-64-pc-linux-gnu. Ok for trunk? > > Uros Bizjak <ubizjak@gmail.com> 于2024年6月12日周三 18:12写道: > > > > On Wed, Jun 12, 2024 at 12:00 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Wed, Jun 12, 2024 at 5:12 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > > > > > Hi, > > > > > > > > For CTEST, we don't have conditional AND so there's no optimization > > > > opportunity to write a new ctest pattern. Emit ctest when ccmp did > > > > comparison to const 0 to save bytes. > > > > > > > > Bootstrapped & regtested under x86-64-pc-linux-gnu. > > > > > > > > Ok for trunk? > > > > > > > > gcc/ChangeLog: > > > > > > > > * config/i386/i386.md (@ccmp<mode>): Use ctestcc when > > > > operands[3] is const0_rtx. > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > * gcc.target/i386/apx-ccmp-1.c: Adjust output to scan ctest. > > > > * gcc.target/i386/apx-ccmp-2.c: Adjust some condition to > > > > compare with 0. LGTM. + (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>,<r>m,<r>") + (match_operand:SWI 3 "<general_operand>" "C,<r><i>,<r><m>")) Perhaps the constraint can be slightly optimized to avoid repeating (<r>,<r>) pairs. "<r>,<r>m ,<r>" "C ,<r><i>,<m>" Uros. > > > > --- > > > > gcc/config/i386/i386.md | 6 +++++- > > > > gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 10 ++++++---- > > > > gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 4 ++-- > > > > 3 files changed, 13 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > > > index a64f2ad4f5f..014d48cddd6 100644 > > > > --- a/gcc/config/i386/i386.md > > > > +++ b/gcc/config/i386/i386.md > > > > @@ -1522,7 +1522,11 @@ (define_insn "@ccmp<mode>" > > > > [(match_operand:SI 4 "const_0_to_15_operand")] > > > > UNSPEC_APX_DFV)))] > > > > "TARGET_APX_CCMP" > > > > - "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" > > > > + { > > > > + if (operands[3] == const0_rtx && !MEM_P (operands[2])) > > > > + return "ctest%C1{<imodesuffix>}\t%G4 %2, %2"; > > > > + return "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"; > > > > + } > > > > > > This could be implemented as an alternative using "r,C" constraint as > > > the first constraint for operands[2,3]. Then the register allocator > > > will match the constraints for you. > > > > Like in the attached (lightly tested) patch. > > > > Uros.
> Perhaps the constraint can be slightly optimized to avoid repeating > (<r>,<r>) pairs. > > "<r>,<r>m ,<r>" > "C ,<r><i>,<m>" Yes, will check-in with this change. Thanks! Uros Bizjak <ubizjak@gmail.com> 于2024年6月13日周四 14:06写道: > > On Thu, Jun 13, 2024 at 3:44 AM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote: > > > > Thanks for the advice, updated patch in attachment. > > > > Bootstrapped/regtested on x86-64-pc-linux-gnu. Ok for trunk? > > > > Uros Bizjak <ubizjak@gmail.com> 于2024年6月12日周三 18:12写道: > > > > > > On Wed, Jun 12, 2024 at 12:00 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > > > On Wed, Jun 12, 2024 at 5:12 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > > > > > > > Hi, > > > > > > > > > > For CTEST, we don't have conditional AND so there's no optimization > > > > > opportunity to write a new ctest pattern. Emit ctest when ccmp did > > > > > comparison to const 0 to save bytes. > > > > > > > > > > Bootstrapped & regtested under x86-64-pc-linux-gnu. > > > > > > > > > > Ok for trunk? > > > > > > > > > > gcc/ChangeLog: > > > > > > > > > > * config/i386/i386.md (@ccmp<mode>): Use ctestcc when > > > > > operands[3] is const0_rtx. > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > * gcc.target/i386/apx-ccmp-1.c: Adjust output to scan ctest. > > > > > * gcc.target/i386/apx-ccmp-2.c: Adjust some condition to > > > > > compare with 0. > > LGTM. > > + (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>,<r>m,<r>") > + (match_operand:SWI 3 "<general_operand>" "C,<r><i>,<r><m>")) > > Perhaps the constraint can be slightly optimized to avoid repeating > (<r>,<r>) pairs. > > "<r>,<r>m ,<r>" > "C ,<r><i>,<m>" > > Uros. > > > > > > --- > > > > > gcc/config/i386/i386.md | 6 +++++- > > > > > gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 10 ++++++---- > > > > > gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 4 ++-- > > > > > 3 files changed, 13 insertions(+), 7 deletions(-) > > > > > > > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > > > > index a64f2ad4f5f..014d48cddd6 100644 > > > > > --- a/gcc/config/i386/i386.md > > > > > +++ b/gcc/config/i386/i386.md > > > > > @@ -1522,7 +1522,11 @@ (define_insn "@ccmp<mode>" > > > > > [(match_operand:SI 4 "const_0_to_15_operand")] > > > > > UNSPEC_APX_DFV)))] > > > > > "TARGET_APX_CCMP" > > > > > - "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" > > > > > + { > > > > > + if (operands[3] == const0_rtx && !MEM_P (operands[2])) > > > > > + return "ctest%C1{<imodesuffix>}\t%G4 %2, %2"; > > > > > + return "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"; > > > > > + } > > > > > > > > This could be implemented as an alternative using "r,C" constraint as > > > > the first constraint for operands[2,3]. Then the register allocator > > > > will match the constraints for you. > > > > > > Like in the attached (lightly tested) patch. > > > > > > Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a64f2ad4f5f..014d48cddd6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1522,7 +1522,11 @@ (define_insn "@ccmp<mode>" [(match_operand:SI 4 "const_0_to_15_operand")] UNSPEC_APX_DFV)))] "TARGET_APX_CCMP" - "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" + { + if (operands[3] == const0_rtx && !MEM_P (operands[2])) + return "ctest%C1{<imodesuffix>}\t%G4 %2, %2"; + return "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"; + } [(set_attr "type" "icmp") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1") diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c index e4e112f07e0..a8b70576760 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c @@ -96,9 +96,11 @@ f15 (double a, double b, int c, int d) /* { dg-final { scan-assembler-times "ccmpg" 2 } } */ /* { dg-final { scan-assembler-times "ccmple" 2 } } */ -/* { dg-final { scan-assembler-times "ccmpne" 4 } } */ -/* { dg-final { scan-assembler-times "ccmpe" 3 } } */ +/* { dg-final { scan-assembler-times "ccmpne" 2 } } */ +/* { dg-final { scan-assembler-times "ccmpe" 1 } } */ /* { dg-final { scan-assembler-times "ccmpbe" 1 } } */ +/* { dg-final { scan-assembler-times "ctestne" 2 } } */ +/* { dg-final { scan-assembler-times "cteste" 2 } } */ /* { dg-final { scan-assembler-times "ccmpa" 1 } } */ -/* { dg-final { scan-assembler-times "ccmpbl" 2 } } */ - +/* { dg-final { scan-assembler-times "ccmpbl" 1 } } */ +/* { dg-final { scan-assembler-times "ctestbl" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c index 0123a686d2c..4a0784394c3 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c @@ -12,7 +12,7 @@ int foo_apx(int a, int b, int c, int d) c += d; a += b; sum += a + c; - if (b != d && sum < c || sum > d) + if (b > d && sum != 0 || sum > d) { b += d; sum += b; @@ -32,7 +32,7 @@ int foo_noapx(int a, int b, int c, int d) c += d; a += b; sum += a + c; - if (b != d && sum < c || sum > d) + if (b > d && sum != 0 || sum > d) { b += d; sum += b;