Message ID | 20100621171703.GA9361@intel.com |
---|---|
State | New |
Headers | show |
On Mon, Jun 21, 2010 at 7:17 PM, H.J. Lu <hongjiu.lu@intel.com> wrote: > When --with-cpu=atom is used to configure gcc, -mtune=atom is used by > default. I got > > FAIL: gcc.target/i386/amd64-abi-3.c scan-assembler subq[\\t ]*\\$88,[\\t ]*%rsp > > This is due to -mtune=atom generates: > > leaq -88(%rsp), %rsp > > instead of > > subq $88, %rsp > > also > > /export/build/gnu/gcc-atom/build-x86_64-linux/gcc/xgcc > -B/export/build/gnu/gcc-atom/build-x86_64-linux/gcc/ > /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-vec-2.c > -O2 > -msse2 -mtune=atom > In file included from > /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-vec-2.c:5:0: > /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-check.h: > In > function ‘do_test’: > /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-check.h:12:1: > internal compiler error: Segmentation fault > Please submit a full bug report, > with preprocessed source if appropriate. > See <http://gcc.gnu.org/bugs.html> for instructions. > > The problem is > > ;; if palignr or psrldq > (define_insn_reservation "atom_sseishft_2" 1 > (and (eq_attr "cpu" "atom") > (and (eq_attr "type" "sseishft") > (and (eq_attr "atom_unit" "sishuf") > (match_operand 2 "immediate_operand")))) > "atom-simple-0") > > ;; if reg/mem op > (define_insn_reservation "atom_sseishft_3" 2 > (and (eq_attr "cpu" "atom") > (and (eq_attr "type" "sseishft") > (not (match_operand 2 "immediate_operand")))) > "atom-complex, atom-all-eu") > > in atom.md. Some sseishft patterns, which generate psrldq, don't > have the 3rd operand as shift count. > > This patch fixes amd64-abi-3.c by adding -mtune=k8 to generate subq > instead of leaq. For sse2-vec-2.c, I added a new type for psrldq. > There is no need to check for psrldq when it is the only alternative. > OK for trunk if no regressions and 4.4/4.5 after a few days? > > Thanks. > > > H.J. > --- > gcc/ > > 2010-06-21 H.J. Lu <hongjiu.lu@intel.com> > > PR target/44615 > * config/i386/atom.md (atom_psrldq): New. > > * config/i386/i386.md (type): Add psrldq. > > * config/i386/ppro_insn (ppro_insn): Also check psrldq. > (ppro_insn_load): Likewise. > (ppro_insn_store): Likewise. > (ppro_insn_both): Likewise. > > * config/i386/sse.md (avx_lshrv1ti3): Replace sseishft with > psrldq for type. > (sse2_lshrv1ti3): Likewise. > (*vec_extractv2di_1_rex64_avx): Likewise. > (*vec_extractv2di_1_avx): Likewise. > (*vec_extractv2di_1_rex64): Replace sseishft with psrldq for > type. Remove atom_unit. > (*vec_extractv2di_1_sse2): Likewise. > > gcc/testsuite/ > > 2010-06-21 H.J. Lu <hongjiu.lu@intel.com> > > PR target/44615 > > * gcc.target/i386/amd64-abi-3.c: Add -mtune=k8. > > * gcc.target/i386/sse2-vec-2a.c: New. Please split out ABI testsuite patch to another patch. These two changes are unrelated. Comments below. > diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md > index a9c4c5d..4c5d1e0 100644 > --- a/gcc/config/i386/atom.md > +++ b/gcc/config/i386/atom.md > @@ -513,6 +513,12 @@ > (not (match_operand 2 "immediate_operand")))) > "atom-complex, atom-all-eu") > > +;; psrldq > +(define_insn_reservation "atom_psrldq" 1 > + (and (eq_attr "cpu" "atom") > + (eq_attr "type" "psrldq")) > + "atom-simple-0") No need for new define_insn_reservation, please merge this functionality into existing atom_sseishft_2. Also, please rename new psrldq type to sseishft1, this is more consistent with existing naming scheme.. Uros.
On Mon, Jun 21, 2010 at 8:38 PM, H.J. Lu <hjl.tools@gmail.com> wrote: > On Mon, Jun 21, 2010 at 11:01 AM, Uros Bizjak <ubizjak@gmail.com> wrote: >> >> Please split out ABI testsuite patch to another patch. These two >> changes are unrelated. > > Here is a patch for gcc.target/i386/amd64-abi-3.c: > > 2010-06-21 H.J. Lu <hongjiu.lu@intel.com> > > PR target/44615 > * gcc.target/i386/amd64-abi-3.c: Add -mtune=k8. > > diff --git a/gcc/testsuite/gcc.target/i386/amd64-abi-3.c > b/gcc/testsuite/gcc.target/i386/amd64-abi-3.c > index 8db7f13..6b7bf6a 100644 > --- a/gcc/testsuite/gcc.target/i386/amd64-abi-3.c > +++ b/gcc/testsuite/gcc.target/i386/amd64-abi-3.c > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-require-effective-target lp64 } */ > -/* { dg-options "-O2 -fomit-frame-pointer -mno-sse" } */ > +/* { dg-options "-O2 -fomit-frame-pointer -mno-sse -mtune=k8" } */ > /* { dg-final { scan-assembler "subq\[\\t \]*\\\$88,\[\\t \]*%rsp" } } */ > /* { dg-final { scan-assembler-not "subq\[\\t \]*\\\$216,\[\\t \]*%rsp" } } */ > > OK for trunk, 4.4 and 4.5? OK everywhere. Thanks, Uros.
On Mon, Jun 21, 2010 at 8:47 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >>> When --with-cpu=atom is used to configure gcc, -mtune=atom is used by >>> default. I got >>> >>> FAIL: gcc.target/i386/amd64-abi-3.c scan-assembler subq[\\t ]*\\$88,[\\t ]*%rsp >>> >>> This is due to -mtune=atom generates: >>> >>> leaq -88(%rsp), %rsp >>> >>> instead of >>> >>> subq $88, %rsp >>> >>> also >>> >>> /export/build/gnu/gcc-atom/build-x86_64-linux/gcc/xgcc >>> -B/export/build/gnu/gcc-atom/build-x86_64-linux/gcc/ >>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-vec-2.c >>> -O2 >>> -msse2 -mtune=atom >>> In file included from >>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-vec-2.c:5:0: >>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-check.h: >>> In >>> function ‘do_test’: >>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-check.h:12:1: >>> internal compiler error: Segmentation fault >>> Please submit a full bug report, >>> with preprocessed source if appropriate. >>> See <http://gcc.gnu.org/bugs.html> for instructions. >>> >>> The problem is >>> >>> ;; if palignr or psrldq >>> (define_insn_reservation "atom_sseishft_2" 1 >>> (and (eq_attr "cpu" "atom") >>> (and (eq_attr "type" "sseishft") >>> (and (eq_attr "atom_unit" "sishuf") >>> (match_operand 2 "immediate_operand")))) >>> "atom-simple-0") >>> >>> ;; if reg/mem op >>> (define_insn_reservation "atom_sseishft_3" 2 >>> (and (eq_attr "cpu" "atom") >>> (and (eq_attr "type" "sseishft") >>> (not (match_operand 2 "immediate_operand")))) >>> "atom-complex, atom-all-eu") >>> >>> in atom.md. Some sseishft patterns, which generate psrldq, don't >>> have the 3rd operand as shift count. >>> >>> This patch fixes amd64-abi-3.c by adding -mtune=k8 to generate subq >>> instead of leaq. For sse2-vec-2.c, I added a new type for psrldq. >>> There is no need to check for psrldq when it is the only alternative. >>> OK for trunk if no regressions and 4.4/4.5 after a few days? >>> > >> No need for new define_insn_reservation, please merge this >> functionality into existing atom_sseishft_2. >> >> Also, please rename new psrldq type to sseishft1, this is more >> consistent with existing naming scheme.. >> >> Uros. >> > > Here is the updated patch. OK for trunk and 4.5? > > Thanks. > > -- > H.J. > --- > gcc/ > > 2010-06-21 H.J. Lu <hongjiu.lu@intel.com> > > PR target/44615 > * config/i386/atom.md (atom_sseishft_2): Also check sseishft1. > > * config/i386/i386.md (type): Add sseishft1 > > * config/i386/ppro_insn (ppro_insn): Also check sseishft1. > (ppro_insn_load): Likewise. > (ppro_insn_store): Likewise. > (ppro_insn_both): Likewise. > > * config/i386/sse.md (avx_lshrv1ti3): Replace sseishft with > sseishft1 for type. > (sse2_lshrv1ti3): Likewise. No need to change above two patterns, they do have operand 2. OK with the change mentioned above. Thanks, Uros.
On Mon, Jun 21, 2010 at 11:54 AM, Uros Bizjak <ubizjak@gmail.com> wrote: > On Mon, Jun 21, 2010 at 8:47 PM, H.J. Lu <hjl.tools@gmail.com> wrote: > >>>> When --with-cpu=atom is used to configure gcc, -mtune=atom is used by >>>> default. I got >>>> >>>> FAIL: gcc.target/i386/amd64-abi-3.c scan-assembler subq[\\t ]*\\$88,[\\t ]*%rsp >>>> >>>> This is due to -mtune=atom generates: >>>> >>>> leaq -88(%rsp), %rsp >>>> >>>> instead of >>>> >>>> subq $88, %rsp >>>> >>>> also >>>> >>>> /export/build/gnu/gcc-atom/build-x86_64-linux/gcc/xgcc >>>> -B/export/build/gnu/gcc-atom/build-x86_64-linux/gcc/ >>>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-vec-2.c >>>> -O2 >>>> -msse2 -mtune=atom >>>> In file included from >>>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-vec-2.c:5:0: >>>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-check.h: >>>> In >>>> function ‘do_test’: >>>> /export/gnu/import/git/gcc/gcc/testsuite/gcc.target/i386/sse2-check.h:12:1: >>>> internal compiler error: Segmentation fault >>>> Please submit a full bug report, >>>> with preprocessed source if appropriate. >>>> See <http://gcc.gnu.org/bugs.html> for instructions. >>>> >>>> The problem is >>>> >>>> ;; if palignr or psrldq >>>> (define_insn_reservation "atom_sseishft_2" 1 >>>> (and (eq_attr "cpu" "atom") >>>> (and (eq_attr "type" "sseishft") >>>> (and (eq_attr "atom_unit" "sishuf") >>>> (match_operand 2 "immediate_operand")))) >>>> "atom-simple-0") >>>> >>>> ;; if reg/mem op >>>> (define_insn_reservation "atom_sseishft_3" 2 >>>> (and (eq_attr "cpu" "atom") >>>> (and (eq_attr "type" "sseishft") >>>> (not (match_operand 2 "immediate_operand")))) >>>> "atom-complex, atom-all-eu") >>>> >>>> in atom.md. Some sseishft patterns, which generate psrldq, don't >>>> have the 3rd operand as shift count. >>>> >>>> This patch fixes amd64-abi-3.c by adding -mtune=k8 to generate subq >>>> instead of leaq. For sse2-vec-2.c, I added a new type for psrldq. >>>> There is no need to check for psrldq when it is the only alternative. >>>> OK for trunk if no regressions and 4.4/4.5 after a few days? >>>> >> >>> No need for new define_insn_reservation, please merge this >>> functionality into existing atom_sseishft_2. >>> >>> Also, please rename new psrldq type to sseishft1, this is more >>> consistent with existing naming scheme.. >>> >>> Uros. >>> >> >> Here is the updated patch. OK for trunk and 4.5? >> >> Thanks. >> >> -- >> H.J. >> --- >> gcc/ >> >> 2010-06-21 H.J. Lu <hongjiu.lu@intel.com> >> >> PR target/44615 >> * config/i386/atom.md (atom_sseishft_2): Also check sseishft1. >> >> * config/i386/i386.md (type): Add sseishft1 >> >> * config/i386/ppro_insn (ppro_insn): Also check sseishft1. >> (ppro_insn_load): Likewise. >> (ppro_insn_store): Likewise. >> (ppro_insn_both): Likewise. >> >> * config/i386/sse.md (avx_lshrv1ti3): Replace sseishft with >> sseishft1 for type. >> (sse2_lshrv1ti3): Likewise. > > No need to change above two patterns, they do have operand 2. > The condition is ;; if palignr or psrldq (define_insn_reservation "atom_sseishft_2" 1 (and (eq_attr "cpu" "atom") (ior (eq_attr "type" "sseishft1") (and (eq_attr "type" "sseishft") (and (eq_attr "atom_unit" "sishuf") (match_operand 2 "immediate_operand"))))) "atom-simple-0") XXX_shrv1ti3 don't have atom_unit. I can either add atom_unit or use sseishft1 I choose sseishft1 for consistency. I can set atom_unit to sishuf if it is preferred. Thanks.
On Mon, Jun 21, 2010 at 9:38 PM, H.J. Lu <hjl.tools@gmail.com> wrote: >>> 2010-06-21 H.J. Lu <hongjiu.lu@intel.com> >>> >>> PR target/44615 >>> * config/i386/atom.md (atom_sseishft_2): Also check sseishft1. >>> >>> * config/i386/i386.md (type): Add sseishft1 >>> >>> * config/i386/ppro_insn (ppro_insn): Also check sseishft1. >>> (ppro_insn_load): Likewise. >>> (ppro_insn_store): Likewise. >>> (ppro_insn_both): Likewise. >>> >>> * config/i386/sse.md (avx_lshrv1ti3): Replace sseishft with >>> sseishft1 for type. >>> (sse2_lshrv1ti3): Likewise. >> >> No need to change above two patterns, they do have operand 2. >> > > The condition is > > ;; if palignr or psrldq > (define_insn_reservation "atom_sseishft_2" 1 > (and (eq_attr "cpu" "atom") > (ior (eq_attr "type" "sseishft1") > (and (eq_attr "type" "sseishft") > (and (eq_attr "atom_unit" "sishuf") > (match_operand 2 "immediate_operand"))))) > "atom-simple-0") > > XXX_shrv1ti3 don't have atom_unit. I can either add atom_unit > or use sseishft1 I choose sseishft1 for consistency. I can set > atom_unit to sishuf if it is preferred. Yes, please set atom_unit to sishuf. Let's leave sseishft1 type to "shift-with-only-one-operand" type of insns. Thanks, Uros.
diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md index a9c4c5d..4c5d1e0 100644 --- a/gcc/config/i386/atom.md +++ b/gcc/config/i386/atom.md @@ -513,6 +513,12 @@ (not (match_operand 2 "immediate_operand")))) "atom-complex, atom-all-eu") +;; psrldq +(define_insn_reservation "atom_psrldq" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "psrldq")) + "atom-simple-0") + (define_insn_reservation "atom_sseimul" 1 (and (eq_attr "cpu" "atom") (eq_attr "type" "sseimul")) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 777f8e7..a336362 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -357,7 +357,7 @@ push,pop,call,callv,leave, str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, - sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sselog,sselog1,sseiadd,sseiadd1,sseishft,psrldq,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, ssemuladd,sse4arg,lwp, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" diff --git a/gcc/config/i386/ppro.md b/gcc/config/i386/ppro.md index 5e163d8..dcd9f1c 100644 --- a/gcc/config/i386/ppro.md +++ b/gcc/config/i386/ppro.md @@ -731,7 +731,7 @@ (define_insn_reservation "ppro_insn" 1 (and (eq_attr "cpu" "pentiumpro") (and (eq_attr "memory" "none,unknown") - (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,psrldq,sseimul,mmx,mmxadd,mmxcmp"))) "decodern,(p0|p1)") ;; read-modify and register-memory instructions have 2 or three uops, @@ -739,13 +739,13 @@ (define_insn_reservation "ppro_insn_load" 3 (and (eq_attr "cpu" "pentiumpro") (and (eq_attr "memory" "load") - (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,psrldq,sseimul,mmx,mmxadd,mmxcmp"))) "decoder0,p2+(p0|p1)") (define_insn_reservation "ppro_insn_store" 1 (and (eq_attr "cpu" "pentiumpro") (and (eq_attr "memory" "store") - (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,psrldq,sseimul,mmx,mmxadd,mmxcmp"))) "decoder0,(p0|p1),p4+p3") ;; read-modify-store instructions produce 4 uops so they have to be @@ -753,6 +753,6 @@ (define_insn_reservation "ppro_insn_both" 4 (and (eq_attr "cpu" "pentiumpro") (and (eq_attr "memory" "both") - (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp"))) + (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,psrldq,sseimul,mmx,mmxadd,mmxcmp"))) "decoder0,p2+(p0|p1),p4+p3") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b4046b8..def8843 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5934,7 +5934,7 @@ operands[2] = GEN_INT (INTVAL (operands[2]) / 8); return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; } - [(set_attr "type" "sseishft") + [(set_attr "type" "psrldq") (set_attr "prefix" "vex") (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) @@ -5964,7 +5964,7 @@ operands[2] = GEN_INT (INTVAL (operands[2]) / 8); return "psrldq\t{%2, %0|%0, %2}"; } - [(set_attr "type" "sseishft") + [(set_attr "type" "psrldq") (set_attr "prefix_data16" "1") (set_attr "length_immediate" "1") (set_attr "mode" "TI")]) @@ -7397,7 +7397,7 @@ vpsrldq\t{$8, %1, %0|%0, %1, 8} vmovq\t{%H1, %0|%0, %H1} vmov{q}\t{%H1, %0|%0, %H1}" - [(set_attr "type" "ssemov,sseishft,ssemov,imov") + [(set_attr "type" "ssemov,psrldq,ssemov,imov") (set_attr "length_immediate" "*,1,*,*") (set_attr "memory" "*,none,*,*") (set_attr "prefix" "vex") @@ -7414,9 +7414,8 @@ psrldq\t{$8, %0|%0, 8} movq\t{%H1, %0|%0, %H1} mov{q}\t{%H1, %0|%0, %H1}" - [(set_attr "type" "ssemov,sseishft,ssemov,imov") + [(set_attr "type" "ssemov,psrldq,ssemov,imov") (set_attr "length_immediate" "*,1,*,*") - (set_attr "atom_unit" "*,sishuf,*,*") (set_attr "memory" "*,none,*,*") (set_attr "mode" "V2SF,TI,TI,DI")]) @@ -7432,7 +7431,7 @@ vmovhps\t{%1, %0|%0, %1} vpsrldq\t{$8, %1, %0|%0, %1, 8} vmovq\t{%H1, %0|%0, %H1}" - [(set_attr "type" "ssemov,sseishft,ssemov") + [(set_attr "type" "ssemov,psrldq,ssemov") (set_attr "length_immediate" "*,1,*") (set_attr "memory" "*,none,*") (set_attr "prefix" "vex") @@ -7449,9 +7448,8 @@ movhps\t{%1, %0|%0, %1} psrldq\t{$8, %0|%0, 8} movq\t{%H1, %0|%0, %H1}" - [(set_attr "type" "ssemov,sseishft,ssemov") + [(set_attr "type" "ssemov,psrldq,ssemov") (set_attr "length_immediate" "*,1,*") - (set_attr "atom_unit" "*,sishuf,*") (set_attr "memory" "*,none,*") (set_attr "mode" "V2SF,TI,TI")]) diff --git a/gcc/testsuite/gcc.target/i386/amd64-abi-3.c b/gcc/testsuite/gcc.target/i386/amd64-abi-3.c index 8db7f13..6b7bf6a 100644 --- a/gcc/testsuite/gcc.target/i386/amd64-abi-3.c +++ b/gcc/testsuite/gcc.target/i386/amd64-abi-3.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target lp64 } */ -/* { dg-options "-O2 -fomit-frame-pointer -mno-sse" } */ +/* { dg-options "-O2 -fomit-frame-pointer -mno-sse -mtune=k8" } */ /* { dg-final { scan-assembler "subq\[\\t \]*\\\$88,\[\\t \]*%rsp" } } */ /* { dg-final { scan-assembler-not "subq\[\\t \]*\\\$216,\[\\t \]*%rsp" } } */ --- /dev/null 2010-06-16 08:44:14.675854310 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/sse2-vec-2a.c 2010-06-21 09:58:10.021217578 -0700 @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2 -mtune=atom" } */ +/* { dg-require-effective-target sse2 } */ + +#include "sse2-vec-2.c"