Message ID | 20170412114351.4nlvkanp3mxy465t@virgil.suse.cz |
---|---|
State | New |
Headers | show |
On Wed, 12 Apr 2017, Martin Jambor wrote: > Hi, > > the patch below is an attempt to deal with PR 80293 as non-invasively > as possible. Basically, it switches off total SRA scalarization of > any local aggregates which contains an array of elements that have one > byte (or less). > > The logic behind this is that accessing such arrays element-wise > usually results in poor code and that such char arrays are often used > for non-statically-typed content anyway, and we do not want to copy > that byte per byte. > > Alan, do you think this could impact your constant pool scalarization > too severely? Hmm, isn't one of the issues that we have if (VAR_P (var) && scalarizable_type_p (TREE_TYPE (var))) { if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var))) <= max_scalarization_size) { create_total_scalarization_access (var); which limits the size of scalarizable vars but not the number of accesses we create for total scalarization? Is scalarizable_type_p only used in contexts where we have no hint of the actual accesses? That is, for the constant pool case we usually have x = .LC0; .. = x[2]; so we have a "hint" that accesses on x are those we'd want to optimize to accesses to .LC0. If we have no accesses on x then we can as well scalarize using word_mode for example? > Richi, if you or Alan does not object in a few days, I'd like to > commit this in time for gcc7. It has passed bootstrap and testing on > x86_64-linux (but the constant pool SRA work was aimed primarily at > ARM). Maybe we can -- if this is the case here -- not completely scalarize in case we don't know how the destination of the aggregate copy is used? > Thanks, > > Martin > > > 2017-04-10 Martin Jambor <mjambor@suse.cz> > > * tree-sra.c (scalarizable_type_p): Make char arrays not totally > scalarizable. > > testsuite/ > * g++.dg/tree-ssa/pr80293.C: New test. > --- > gcc/testsuite/g++.dg/tree-ssa/pr80293.C | 45 +++++++++++++++++++++++++++++++++ > gcc/tree-sra.c | 2 +- > 2 files changed, 46 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr80293.C > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr80293.C b/gcc/testsuite/g++.dg/tree-ssa/pr80293.C > new file mode 100644 > index 00000000000..7faf35ae983 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr80293.C > @@ -0,0 +1,45 @@ > +// { dg-do compile } > +// { dg-options "-O2 -std=gnu++11 -fdump-tree-optimized" } */ > + > +#include <array> > + > +// Return a copy of the underlying memory of an arbitrary value. > +template < > + typename T, > + typename = typename std::enable_if<std::is_trivially_copyable<T>::value>::type > +> > +auto getMem( > + T const & value > +) -> std::array<char, sizeof(T)> { > + auto ret = std::array<char, sizeof(T)>{}; > + __builtin_memcpy(ret.data(), &value, sizeof(T)); > + return ret; > +} > + > +template < > + typename T, > + typename = typename std::enable_if<std::is_trivially_copyable<T>::value>::type > +> > +auto fromMem( > + std::array<char, sizeof(T)> const & buf > +) -> T { > + auto ret = T{}; > + __builtin_memcpy(&ret, buf.data(), sizeof(T)); > + return ret; > +} > + > +double foo1(std::uint64_t arg) { > + return fromMem<double>(getMem(arg)); > +} > + > +double foo2(std::uint64_t arg) { > + return *reinterpret_cast<double*>(&arg); > +} > + > +double foo3(std::uint64_t arg) { > + double ret; > + __builtin_memcpy(&ret, &arg, sizeof(arg)); > + return ret; > +} > + > +// { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } > diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c > index 02453d3ed9a..cbe9e862a2f 100644 > --- a/gcc/tree-sra.c > +++ b/gcc/tree-sra.c > @@ -981,7 +981,7 @@ scalarizable_type_p (tree type) > if (TYPE_DOMAIN (type) == NULL_TREE > || !tree_fits_shwi_p (TYPE_SIZE (type)) > || !tree_fits_shwi_p (TYPE_SIZE (TREE_TYPE (type))) > - || (tree_to_shwi (TYPE_SIZE (TREE_TYPE (type))) <= 0) > + || (tree_to_shwi (TYPE_SIZE (TREE_TYPE (type))) <= BITS_PER_UNIT) > || !tree_fits_shwi_p (TYPE_MIN_VALUE (TYPE_DOMAIN (type)))) > return false; > if (tree_to_shwi (TYPE_SIZE (type)) == 0 >
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr80293.C b/gcc/testsuite/g++.dg/tree-ssa/pr80293.C new file mode 100644 index 00000000000..7faf35ae983 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr80293.C @@ -0,0 +1,45 @@ +// { dg-do compile } +// { dg-options "-O2 -std=gnu++11 -fdump-tree-optimized" } */ + +#include <array> + +// Return a copy of the underlying memory of an arbitrary value. +template < + typename T, + typename = typename std::enable_if<std::is_trivially_copyable<T>::value>::type +> +auto getMem( + T const & value +) -> std::array<char, sizeof(T)> { + auto ret = std::array<char, sizeof(T)>{}; + __builtin_memcpy(ret.data(), &value, sizeof(T)); + return ret; +} + +template < + typename T, + typename = typename std::enable_if<std::is_trivially_copyable<T>::value>::type +> +auto fromMem( + std::array<char, sizeof(T)> const & buf +) -> T { + auto ret = T{}; + __builtin_memcpy(&ret, buf.data(), sizeof(T)); + return ret; +} + +double foo1(std::uint64_t arg) { + return fromMem<double>(getMem(arg)); +} + +double foo2(std::uint64_t arg) { + return *reinterpret_cast<double*>(&arg); +} + +double foo3(std::uint64_t arg) { + double ret; + __builtin_memcpy(&ret, &arg, sizeof(arg)); + return ret; +} + +// { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 02453d3ed9a..cbe9e862a2f 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -981,7 +981,7 @@ scalarizable_type_p (tree type) if (TYPE_DOMAIN (type) == NULL_TREE || !tree_fits_shwi_p (TYPE_SIZE (type)) || !tree_fits_shwi_p (TYPE_SIZE (TREE_TYPE (type))) - || (tree_to_shwi (TYPE_SIZE (TREE_TYPE (type))) <= 0) + || (tree_to_shwi (TYPE_SIZE (TREE_TYPE (type))) <= BITS_PER_UNIT) || !tree_fits_shwi_p (TYPE_MIN_VALUE (TYPE_DOMAIN (type)))) return false; if (tree_to_shwi (TYPE_SIZE (type)) == 0