Message ID | 20180125100921.GV20622@bubble.grove.modra.org |
---|---|
State | New |
Headers | show |
Series | PR84033, powerpc64le -moptimize-swaps bad code with vec_vbpermq | expand |
On Jan 25, 2018, at 4:09 AM, Alan Modra <amodra@gmail.com> wrote: > > vbpermq produces its output in bits 48..63 of the target vector reg, > so the output cannot be lane swapped. Bootstrapped and regression > tested powerpc64le-linux. OK to apply mainline, and backport to the > branches? I can't approve, but FWIW, I agree with this patch. Bill > > gcc/ > PR target/84033 > * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude > UNSPEC_VBPERMQ. Sort other unspecs. > gcc/testsuite/ > PR target/84033 > * gcc.target/powerpc/swaps-p8-46.c: New. > > diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c > index 1f95290..ffcbba9 100644 > --- a/gcc/config/rs6000/rs6000-p8swap.c > +++ b/gcc/config/rs6000/rs6000-p8swap.c > @@ -741,6 +741,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) > { > default: > break; > + case UNSPEC_VBPERMQ: > case UNSPEC_VMRGH_DIRECT: > case UNSPEC_VMRGL_DIRECT: > case UNSPEC_VPACK_SIGN_SIGN_SAT: > @@ -762,8 +763,14 @@ rtx_is_swappable_p (rtx op, unsigned int *special) > case UNSPEC_VSUMSWS: > case UNSPEC_VSUMSWS_DIRECT: > case UNSPEC_VSX_CONCAT: > + case UNSPEC_VSX_CVDPSPN: > + case UNSPEC_VSX_CVSPDP: > + case UNSPEC_VSX_CVSPDPN: > + case UNSPEC_VSX_EXTRACT: > case UNSPEC_VSX_SET: > case UNSPEC_VSX_SLDWI: > + case UNSPEC_VSX_VEC_INIT: > + case UNSPEC_VSX_VSLO: > case UNSPEC_VUNPACK_HI_SIGN: > case UNSPEC_VUNPACK_HI_SIGN_DIRECT: > case UNSPEC_VUNPACK_LO_SIGN: > @@ -774,12 +781,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) > case UNSPEC_VUPKLPX: > case UNSPEC_VUPKLS_V4SF: > case UNSPEC_VUPKLU_V4SF: > - case UNSPEC_VSX_CVDPSPN: > - case UNSPEC_VSX_CVSPDP: > - case UNSPEC_VSX_CVSPDPN: > - case UNSPEC_VSX_EXTRACT: > - case UNSPEC_VSX_VSLO: > - case UNSPEC_VSX_VEC_INIT: > return 0; > case UNSPEC_VSPLT_DIRECT: > case UNSPEC_VSX_XXSPLTD: > diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c > new file mode 100644 > index 0000000..23494b6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c > @@ -0,0 +1,34 @@ > +/* { dg-do run { target { powerpc64le-*-* } } } */ > +/* { dg-require-effective-target powerpc_p8vector_ok } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O2 " } */ > + > +typedef __attribute__ ((__aligned__ (8))) unsigned long long __m64; > +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); > + > +/* PR84033. Extracted from xmmintrin.h but with a pointer param to > + allow swaps to happen when not inline. */ > +int __attribute__ ((__noinline__)) > +_mm_movemask_ps (__m128 *__A) > +{ > + __vector __m64 result; > + static const __vector unsigned int perm_mask = > + { > + 0x00204060, 0x80808080, 0x80808080, 0x80808080 > + }; > + > + result = (__vector __m64) > + __builtin_vec_vbpermq ((__vector unsigned char) (*__A), > + (__vector unsigned char) perm_mask); > + return result[1]; > +} > + > +int > +main (void) > +{ > + union { unsigned int i[4]; __m128 m; } x > + = { 0x80000000, 0x80000000, 0x7fffffff, 0x7fffffff }; > + if (_mm_movemask_ps (&x.m) != 3) > + __builtin_abort (); > + return 0; > +} > > -- > Alan Modra > Australia Development Lab, IBM >
On Thu, Jan 25, 2018 at 08:39:21PM +1030, Alan Modra wrote: > vbpermq produces its output in bits 48..63 of the target vector reg, > so the output cannot be lane swapped. Bootstrapped and regression > tested powerpc64le-linux. OK to apply mainline, and backport to the > branches? > > gcc/ > PR target/84033 > * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude > UNSPEC_VBPERMQ. Sort other unspecs. > gcc/testsuite/ > PR target/84033 > * gcc.target/powerpc/swaps-p8-46.c: New. > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c > @@ -0,0 +1,34 @@ > +/* { dg-do run { target { powerpc64le-*-* } } } */ /* { dg-do run { target { lp64 } } } */ would be better, since you only care whether you are running 64-bit (and the target triple does not tell you; although standard configs for powerpc64le pretty much enforce it, of course). Okay for trunk with or without that change; okay for the branches after looking for fallout. Thanks! Segher
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index 1f95290..ffcbba9 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -741,6 +741,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) { default: break; + case UNSPEC_VBPERMQ: case UNSPEC_VMRGH_DIRECT: case UNSPEC_VMRGL_DIRECT: case UNSPEC_VPACK_SIGN_SIGN_SAT: @@ -762,8 +763,14 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VSUMSWS: case UNSPEC_VSUMSWS_DIRECT: case UNSPEC_VSX_CONCAT: + case UNSPEC_VSX_CVDPSPN: + case UNSPEC_VSX_CVSPDP: + case UNSPEC_VSX_CVSPDPN: + case UNSPEC_VSX_EXTRACT: case UNSPEC_VSX_SET: case UNSPEC_VSX_SLDWI: + case UNSPEC_VSX_VEC_INIT: + case UNSPEC_VSX_VSLO: case UNSPEC_VUNPACK_HI_SIGN: case UNSPEC_VUNPACK_HI_SIGN_DIRECT: case UNSPEC_VUNPACK_LO_SIGN: @@ -774,12 +781,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VUPKLPX: case UNSPEC_VUPKLS_V4SF: case UNSPEC_VUPKLU_V4SF: - case UNSPEC_VSX_CVDPSPN: - case UNSPEC_VSX_CVSPDP: - case UNSPEC_VSX_CVSPDPN: - case UNSPEC_VSX_EXTRACT: - case UNSPEC_VSX_VSLO: - case UNSPEC_VSX_VEC_INIT: return 0; case UNSPEC_VSPLT_DIRECT: case UNSPEC_VSX_XXSPLTD: diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c new file mode 100644 index 0000000..23494b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { powerpc64le-*-* } } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O2 " } */ + +typedef __attribute__ ((__aligned__ (8))) unsigned long long __m64; +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* PR84033. Extracted from xmmintrin.h but with a pointer param to + allow swaps to happen when not inline. */ +int __attribute__ ((__noinline__)) +_mm_movemask_ps (__m128 *__A) +{ + __vector __m64 result; + static const __vector unsigned int perm_mask = + { + 0x00204060, 0x80808080, 0x80808080, 0x80808080 + }; + + result = (__vector __m64) + __builtin_vec_vbpermq ((__vector unsigned char) (*__A), + (__vector unsigned char) perm_mask); + return result[1]; +} + +int +main (void) +{ + union { unsigned int i[4]; __m128 m; } x + = { 0x80000000, 0x80000000, 0x7fffffff, 0x7fffffff }; + if (_mm_movemask_ps (&x.m) != 3) + __builtin_abort (); + return 0; +}