Message ID | 20160115154630.GA15775@msticlxl57.ims.intel.com |
---|---|
State | New |
Headers | show |
On Fri, Jan 15, 2016 at 4:46 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote: > Hi, > > This patch continues resolving andn regression case in stv pass > (see https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01017.html). > In this patch a new andn pattern added similar to other bit > DI patterns we have for stv pass. > > This improves performance of 462.libquantum benchmark on Haswell > (+2.6% on -O2, +1% on -O3 -flto). > > Unfortunately this patch doesn't enable generation of pandn in case > target doesn't have BMI. Probably peephole may be used for such targets? > Or we may allow andn and then split it back to and + xor for them. IMO, we want a splitter here. We should optimize compiler for newer targets. > Bootstrapped and regtested on x86_64-unknown-linux-gnu. OK for trunk? > > Thanks, > Ilya > -- > gcc/ > > 2016-01-15 Ilya Enkovich <enkovich.gnu@gmail.com> > > * config/i386/i386.c (scalar_to_vector_candidate_p): Support > andnot instruction. > (scalar_chain::convert_op): Likewise. > * config/i386/i386.md (*andndi3_doubleword): New. > > gcc/testsuite/ > > 2016-01-15 Ilya Enkovich <enkovich.gnu@gmail.com> > > * gcc.target/i386/pr65105-5.c: Adjust to andn generation. OK for mainline. Thanks, Uros. > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index de41477..a0b0d68 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -2815,7 +2815,11 @@ scalar_to_vector_candidate_p (rtx_insn *insn) > return false; > } > > - if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))) > + if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)) > + /* Check for andnot case. */ > + && (GET_CODE (src) != AND > + || GET_CODE (XEXP (src, 0)) != NOT > + || !REG_P (XEXP (XEXP (src, 0), 0)))) > return false; > > if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) > @@ -3383,7 +3387,12 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn) > { > *op = copy_rtx_if_shared (*op); > > - if (MEM_P (*op)) > + if (GET_CODE (*op) == NOT) > + { > + convert_op (&XEXP (*op, 0), insn); > + PUT_MODE (*op, V2DImode); > + } > + else if (MEM_P (*op)) > { > rtx tmp = gen_reg_rtx (DImode); > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 71941d0..f16b42a 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -8645,6 +8645,23 @@ > (clobber (reg:CC FLAGS_REG))])] > "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") > > +(define_insn_and_split "*andndi3_doubleword" > + [(set (match_operand:DI 0 "register_operand" "=r,r") > + (and:DI > + (not:DI (match_operand:DI 1 "register_operand" "r,r")) > + (match_operand:DI 2 "nonimmediate_operand" "r,m"))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE" > + "#" > + "&& reload_completed" > + [(parallel [(set (match_dup 0) > + (and:SI (not:SI (match_dup 1)) (match_dup 2))) > + (clobber (reg:CC FLAGS_REG))]) > + (parallel [(set (match_dup 3) > + (and:SI (not:SI (match_dup 4)) (match_dup 5))) > + (clobber (reg:CC FLAGS_REG))])] > + "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") > + > (define_insn "*<code>hi_1" > [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k") > (any_or:HI > diff --git a/gcc/testsuite/gcc.target/i386/pr65105-5.c b/gcc/testsuite/gcc.target/i386/pr65105-5.c > index 5818c1c..639bbe1 100644 > --- a/gcc/testsuite/gcc.target/i386/pr65105-5.c > +++ b/gcc/testsuite/gcc.target/i386/pr65105-5.c > @@ -1,7 +1,7 @@ > /* PR target/pr65105 */ > /* { dg-do compile { target { ia32 } } } */ > /* { dg-options "-O2 -march=core-avx2" } */ > -/* { dg-final { scan-assembler "pand" } } */ > +/* { dg-final { scan-assembler "pandn" } } */ > /* { dg-final { scan-assembler "pxor" } } */ > /* { dg-final { scan-assembler "ptest" } } */ >
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index de41477..a0b0d68 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2815,7 +2815,11 @@ scalar_to_vector_candidate_p (rtx_insn *insn) return false; } - if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))) + if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)) + /* Check for andnot case. */ + && (GET_CODE (src) != AND + || GET_CODE (XEXP (src, 0)) != NOT + || !REG_P (XEXP (XEXP (src, 0), 0)))) return false; if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) @@ -3383,7 +3387,12 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn) { *op = copy_rtx_if_shared (*op); - if (MEM_P (*op)) + if (GET_CODE (*op) == NOT) + { + convert_op (&XEXP (*op, 0), insn); + PUT_MODE (*op, V2DImode); + } + else if (MEM_P (*op)) { rtx tmp = gen_reg_rtx (DImode); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 71941d0..f16b42a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8645,6 +8645,23 @@ (clobber (reg:CC FLAGS_REG))])] "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") +(define_insn_and_split "*andndi3_doubleword" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (and:DI + (not:DI (match_operand:DI 1 "register_operand" "r,r")) + (match_operand:DI 2 "nonimmediate_operand" "r,m"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (and:SI (not:SI (match_dup 1)) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 3) + (and:SI (not:SI (match_dup 4)) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") + (define_insn "*<code>hi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k") (any_or:HI diff --git a/gcc/testsuite/gcc.target/i386/pr65105-5.c b/gcc/testsuite/gcc.target/i386/pr65105-5.c index 5818c1c..639bbe1 100644 --- a/gcc/testsuite/gcc.target/i386/pr65105-5.c +++ b/gcc/testsuite/gcc.target/i386/pr65105-5.c @@ -1,7 +1,7 @@ /* PR target/pr65105 */ /* { dg-do compile { target { ia32 } } } */ /* { dg-options "-O2 -march=core-avx2" } */ -/* { dg-final { scan-assembler "pand" } } */ +/* { dg-final { scan-assembler "pandn" } } */ /* { dg-final { scan-assembler "pxor" } } */ /* { dg-final { scan-assembler "ptest" } } */