Message ID | 20100720133034.GA24667@virgil.arch.suse.de |
---|---|
State | New |
Headers | show |
On Tue, 20 Jul 2010, Martin Jambor wrote: > Hi, > > the simple patch below fixes PR 44900. Apparently we rarely happen to > land in the SRA_UDH_LEFT refreshing mode in > load_assign_lhs_subreplacements because cleaning up after it has not > worked for a long time. But it does require a special combination of > assignments between special unions to trigger, such as the one in the > testcase. > > This patch is aginst the 4.5 branch but the one for trunk is exactly > the same. I have bootstrapped and regression tested both without any > problems. OK for trunk and the branch? I belive you can remove test_assert (and thus the printf decl and the test macro) from the testcase. Ok with that change. Thanks, RIchard. > Thanks, > > Martin > > > > 2010-07-20 Martin Jambor <mjambor@suse.cz> > > PR tree-optimization/44900 > * tree-sra.c (load_assign_lhs_subreplacements): Updated comments. > (sra_modify_assign): Move gsi to the next statmenent unconditionally. > > Index: gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C > =================================================================== > --- /dev/null > +++ gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C > @@ -0,0 +1,91 @@ > +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ > +/* { dg-options "-msse" } */ > +/* { dg-require-effective-target sse } */ > + > +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); > +typedef float __v4sf __attribute__ ((__vector_size__ (16))); > + > +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, > +__artificial__)) > +_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W) > +{ > + return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z }; > +} > + > +struct vec > +{ > + union { > + __m128 v; > + float e[4]; > + }; > + > + static const vec & zero() > + { > + static const vec v = _mm_set_ps(0, 0, 0, 0); > + return v; > + } > + > + vec() {} > + vec(const __m128 & a) : v(a) {} > + > + operator const __m128&() const { return v; } > +}; > + > +struct vec2 > +{ > + vec _v1; > + vec _v2; > + > + vec2() {} > + vec2(const vec & a, const vec & b) : _v1(a), _v2(b) {} > + > + static vec2 load(const float * a) > + { > + return vec2( > + __builtin_ia32_loadups(&a[0]), > + __builtin_ia32_loadups(&a[4])); > + } > + > + const vec & v1() const { return _v1; } > + const vec & v2() const { return _v2; } > +}; > + > +extern "C" { > + int printf (const char*, ...); > + void abort(void); > +} > + > +inline bool test_assert( bool is_succeed, const char * file_name, int line_num > +) > +{ > + if ( !is_succeed ) > + { > + printf("error: %s(%d)\n", file_name, line_num); > + } > + > + return is_succeed; > +} > + > +inline bool operator==(const vec & a, const vec & b) > +{ return 0xf == __builtin_ia32_movmskps(__builtin_ia32_cmpeqps(a, b)); } > + > +#define test(x, y) test_assert( (x)==(y), __FILE__, __LINE__ ) > + > +int main( int argc, char * argv[] ) > +{ > + __attribute__((aligned(16))) float data[] = > + { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5 }; > + > + float * p = &data[2]; > + vec2 a; > + > + a = vec2::load(p); > + > + vec v1 = a.v1(); > + vec v2 = a.v2(); > + > + if (v2.e[3] != 7.0) > + abort(); > + > + return 0; > +} > Index: gcc-4_5-branch/gcc/tree-sra.c > =================================================================== > --- gcc-4_5-branch.orig/gcc/tree-sra.c > +++ gcc-4_5-branch/gcc/tree-sra.c > @@ -2445,9 +2445,11 @@ handle_unscalarized_data_in_subtree (str > (sub)tree. If that is not possible, refresh the TOP_RACC base aggregate and > load the accesses from it. LEFT_OFFSET is the offset of the left whole > subtree being copied, RIGHT_OFFSET is the same thing for the right subtree. > - GSI is stmt iterator used for statement insertions. *REFRESHED is true iff > - the rhs top aggregate has already been refreshed by contents of its scalar > - reductions and is set to true if this function has to do it. */ > + NEW_GSI is stmt iterator used for statement insertions after the original > + assignment, OLD_GSI is used to insert statements before the assignment. > + *REFRESHED keeps the information whether we have needed to refresh > + replacements of the LHS and from which side of the assignments this takes > + place. */ > > static void > load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc, > @@ -2747,9 +2749,7 @@ sra_modify_assign (gimple *stmt, gimple_ > &orig_gsi, gsi, &refreshed, lhs); > if (refreshed != SRA_UDH_RIGHT) > { > - if (*stmt == gsi_stmt (*gsi)) > - gsi_next (gsi); > - > + gsi_next (gsi); > unlink_stmt_vdef (*stmt); > gsi_remove (&orig_gsi, true); > sra_stats.deleted++; > >
Index: gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C =================================================================== --- /dev/null +++ gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C @@ -0,0 +1,91 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-msse" } */ +/* { dg-require-effective-target sse } */ + +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, +__artificial__)) +_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W) +{ + return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z }; +} + +struct vec +{ + union { + __m128 v; + float e[4]; + }; + + static const vec & zero() + { + static const vec v = _mm_set_ps(0, 0, 0, 0); + return v; + } + + vec() {} + vec(const __m128 & a) : v(a) {} + + operator const __m128&() const { return v; } +}; + +struct vec2 +{ + vec _v1; + vec _v2; + + vec2() {} + vec2(const vec & a, const vec & b) : _v1(a), _v2(b) {} + + static vec2 load(const float * a) + { + return vec2( + __builtin_ia32_loadups(&a[0]), + __builtin_ia32_loadups(&a[4])); + } + + const vec & v1() const { return _v1; } + const vec & v2() const { return _v2; } +}; + +extern "C" { + int printf (const char*, ...); + void abort(void); +} + +inline bool test_assert( bool is_succeed, const char * file_name, int line_num +) +{ + if ( !is_succeed ) + { + printf("error: %s(%d)\n", file_name, line_num); + } + + return is_succeed; +} + +inline bool operator==(const vec & a, const vec & b) +{ return 0xf == __builtin_ia32_movmskps(__builtin_ia32_cmpeqps(a, b)); } + +#define test(x, y) test_assert( (x)==(y), __FILE__, __LINE__ ) + +int main( int argc, char * argv[] ) +{ + __attribute__((aligned(16))) float data[] = + { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5 }; + + float * p = &data[2]; + vec2 a; + + a = vec2::load(p); + + vec v1 = a.v1(); + vec v2 = a.v2(); + + if (v2.e[3] != 7.0) + abort(); + + return 0; +} Index: gcc-4_5-branch/gcc/tree-sra.c =================================================================== --- gcc-4_5-branch.orig/gcc/tree-sra.c +++ gcc-4_5-branch/gcc/tree-sra.c @@ -2445,9 +2445,11 @@ handle_unscalarized_data_in_subtree (str (sub)tree. If that is not possible, refresh the TOP_RACC base aggregate and load the accesses from it. LEFT_OFFSET is the offset of the left whole subtree being copied, RIGHT_OFFSET is the same thing for the right subtree. - GSI is stmt iterator used for statement insertions. *REFRESHED is true iff - the rhs top aggregate has already been refreshed by contents of its scalar - reductions and is set to true if this function has to do it. */ + NEW_GSI is stmt iterator used for statement insertions after the original + assignment, OLD_GSI is used to insert statements before the assignment. + *REFRESHED keeps the information whether we have needed to refresh + replacements of the LHS and from which side of the assignments this takes + place. */ static void load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc, @@ -2747,9 +2749,7 @@ sra_modify_assign (gimple *stmt, gimple_ &orig_gsi, gsi, &refreshed, lhs); if (refreshed != SRA_UDH_RIGHT) { - if (*stmt == gsi_stmt (*gsi)) - gsi_next (gsi); - + gsi_next (gsi); unlink_stmt_vdef (*stmt); gsi_remove (&orig_gsi, true); sra_stats.deleted++;