Message ID | alpine.LNX.2.00.1012011114140.23074@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
On Wed, Dec 1, 2010 at 04:21, Richard Guenther <rguenther@suse.de> wrote: > Comments inline (non-quoted patch - attachments suck). Sorry. This is the last time I will use gmail to send out a patch. > + return dr_analyze_innermost (dr) && > + (integer_zerop (size_binop (MINUS_EXPR, > + fold_convert (sizetype, DR_STEP (dr)), > + TYPE_SIZE_UNIT (TREE_TYPE (op0)))) > + || integer_zerop (size_binop (PLUS_EXPR, > + TYPE_SIZE_UNIT (TREE_TYPE (op0)), > + fold_convert (sizetype, DR_STEP (dr))))); > > You leak dr. && goes to the next line. Right. > You want > tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (DR_STEP (dr)), > DR_STEP (dr)), TYPE_SIZE_UNIT (...)) > I will replace this also in generate_memset_zero. > if this is the same test as in generate_memset_zero please put this > helper in tree-loop-distribution and use it there as well. The problem is that in generate_memset_zero we do something different in the branches: /* Test for a positive stride, iterating over every element. */ if (integer_zerop (size_binop (MINUS_EXPR, fold_convert (sizetype, DR_STEP (dr)), TYPE_SIZE_UNIT (TREE_TYPE (op0))))) { addr_base = fold_convert_loc (loc, sizetype, size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr))); addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (DR_BASE_ADDRESS (dr)), DR_BASE_ADDRESS (dr), addr_base); nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); } /* Test for a negative stride, iterating over every element. */ else if (integer_zerop (size_binop (PLUS_EXPR, TYPE_SIZE_UNIT (TREE_TYPE (op0)), fold_convert (sizetype, DR_STEP (dr))))) { nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); addr_base = fold_convert_loc (loc, sizetype, addr_base); addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, fold_convert_loc (loc, sizetype, nb_bytes)); addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, TYPE_SIZE_UNIT (TREE_TYPE (op0))); addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (DR_BASE_ADDRESS (dr)), DR_BASE_ADDRESS (dr), addr_base); }
On Wed, Dec 1, 2010 at 09:26, Sebastian Pop <sebpop@gmail.com> wrote: > On Wed, Dec 1, 2010 at 04:21, Richard Guenther <rguenther@suse.de> wrote: >> Comments inline (non-quoted patch - attachments suck). > > Sorry. This is the last time I will use gmail to send out a patch. > >> + return dr_analyze_innermost (dr) && >> + (integer_zerop (size_binop (MINUS_EXPR, >> + fold_convert (sizetype, DR_STEP (dr)), >> + TYPE_SIZE_UNIT (TREE_TYPE (op0)))) >> + || integer_zerop (size_binop (PLUS_EXPR, >> + TYPE_SIZE_UNIT (TREE_TYPE (op0)), >> + fold_convert (sizetype, DR_STEP (dr))))); >> >> You leak dr. && goes to the next line. > > Right. > >> You want >> tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (DR_STEP (dr)), >> DR_STEP (dr)), TYPE_SIZE_UNIT (...)) >> > > I will replace this also in generate_memset_zero. > >> if this is the same test as in generate_memset_zero please put this >> helper in tree-loop-distribution and use it there as well. > > The problem is that in generate_memset_zero we do something different > in the branches: Oh never mind this comment, I see what you meant before. Sebastian
On Wed, Dec 1, 2010 at 11:15, Sebastian Pop <sebpop@gmail.com> wrote: > Hi, > > here is the updated patch that addresses the review comments. > Ok for trunk after bootstrap and test? This patch passed bootstrap and test on amd64-linux. Sebastian > > Thanks, > Sebastian > > 2010-11-30 Sebastian Pop <sebastian.pop@amd.com> > > PR tree-optimization/45199 > * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New. > (stores_zero_from_loop): Call > mem_write_stride_of_same_size_as_unit_type_p. > * tree-data-ref.h (stride_of_unit_type_p): New. > * tree-loop-distribution.c (generate_memset_zero): Simplified. > Call stride_of_unit_type_p. > (build_rdg_partition_for_component): Do not call > rdg_flag_similar_memory_accesses when > flag_tree_loop_distribute_patterns is set. > > * gcc.dg/tree-ssa/ldist-15.c: New. > * gcc.dg/tree-ssa/ldist-16.c: New. > * gfortran.dg/ldist-pr45199.f: New. > --- > gcc/ChangeLog | 13 ++++++++ > gcc/testsuite/ChangeLog | 7 ++++ > gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c | 23 +++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c | 21 ++++++++++++++ > gcc/testsuite/gfortran.dg/ldist-pr45199.f | 27 +++++++++++++++++ > gcc/tree-data-ref.c | 24 +++++++++++++++- > gcc/tree-data-ref.h | 11 +++++++ > gcc/tree-loop-distribution.c | 44 ++++++++++------------------- > 8 files changed, 140 insertions(+), 30 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c > create mode 100644 gcc/testsuite/gfortran.dg/ldist-pr45199.f > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 5706ef8..2881d58 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,16 @@ > +2010-11-30 Sebastian Pop <sebastian.pop@amd.com> > + > + PR tree-optimization/45199 > + * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New. > + (stores_zero_from_loop): Call > + mem_write_stride_of_same_size_as_unit_type_p. > + * tree-data-ref.h (stride_of_unit_type_p): New. > + * tree-loop-distribution.c (generate_memset_zero): Simplified. > + Call stride_of_unit_type_p. > + (build_rdg_partition_for_component): Do not call > + rdg_flag_similar_memory_accesses when > + flag_tree_loop_distribute_patterns is set. > + > 2010-11-29 H.J. Lu <hongjiu.lu@intel.com> > > PR driver/46712 > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index 5792e66..20aaf04 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,3 +1,10 @@ > +2010-11-30 Sebastian Pop <sebastian.pop@amd.com> > + > + PR tree-optimization/45199 > + * gcc.dg/tree-ssa/ldist-15.c: New. > + * gcc.dg/tree-ssa/ldist-16.c: New. > + * gfortran.dg/ldist-pr45199.f: New. > + > 2010-11-29 Nicola Pero <nicola.pero@meta-innovation.com> > > * objc.dg/duplicate-class-1.m: New. > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c > new file mode 100644 > index 0000000..7ce3b95 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c > @@ -0,0 +1,23 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ > + > +int x[1000]; > + > +void foo (int n) > +{ > + int i; > + > + for (i = 0; i < n; ++i) > + { > + x[2*i] = 0; > + x[2*i + 1] = 1; > + } > +} > + > +/* We should not apply loop distribution as it is not beneficial from > + a data locality point of view. Also it is not possible to generate > + a memset (0) as the write has a stride of 2. */ > + > +/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */ > +/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */ > +/* { dg-final { cleanup-tree-dump "ldist" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c > new file mode 100644 > index 0000000..61e8e56 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ > + > +int x[1000]; > + > +void foo (int n) > +{ > + int i; > + > + for (i = 0; i < n; ++i) > + { > + x[i] = 0; > + x[2*i + 1] = 1; > + } > +} > + > +/* We should apply loop distribution and generate a memset (0). */ > + > +/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */ > +/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */ > +/* { dg-final { cleanup-tree-dump "ldist" } } */ > diff --git a/gcc/testsuite/gfortran.dg/ldist-pr45199.f b/gcc/testsuite/gfortran.dg/ldist-pr45199.f > new file mode 100644 > index 0000000..2fab43a > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/ldist-pr45199.f > @@ -0,0 +1,27 @@ > +! { dg-do compile } > +! { dg-options "-O3 -fdump-tree-ldist-details" } > + > + parameter(numlev=3,numoblev=1000) > + integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev) > + logical l_numob(numoblev,numlev) > + do ixe=1,numoblev > + do iye=1,numlev > + i_otyp(ixe,iye)=0 > + i_styp(ixe,iye)=0 > + l_numob(ixe,iye)=.false. > + enddo > + enddo > + do i=1,m > + do j=1,n > + if (l_numob(i,j)) then > + write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j) > + endif > + enddo > + enddo > + end > + > +! GCC should apply memset zero loop distribution and it should not ICE. > + > +! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } } > +! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } } > +! { dg-final { cleanup-tree-dump "ldist" } } > diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c > index 3cee320..094d168 100644 > --- a/gcc/tree-data-ref.c > +++ b/gcc/tree-data-ref.c > @@ -4974,6 +4974,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) > free (bbs); > } > > +/* Returns true when STMT is an assignment that contains a data > + reference on its LHS with a stride of the same size as its unit > + type. */ > + > +static bool > +mem_write_stride_of_same_size_as_unit_type_p (gimple stmt) > +{ > + struct data_reference *dr = XCNEW (struct data_reference); > + tree op0 = gimple_assign_lhs (stmt); > + bool res; > + > + DR_STMT (dr) = stmt; > + DR_REF (dr) = op0; > + > + res = dr_analyze_innermost (dr) > + && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); > + > + free_data_ref (dr); > + return res; > +} > + > /* Initialize STMTS with all the statements of LOOP that contain a > store to memory of the form "A[i] = 0". */ > > @@ -4994,7 +5015,8 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) > && is_gimple_assign (stmt) > && gimple_assign_rhs_code (stmt) == INTEGER_CST > && (op = gimple_assign_rhs1 (stmt)) > - && (integer_zerop (op) || real_zerop (op))) > + && (integer_zerop (op) || real_zerop (op)) > + && mem_write_stride_of_same_size_as_unit_type_p (stmt)) > VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); > > free (bbs); > diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h > index 844a2ae..d929f31 100644 > --- a/gcc/tree-data-ref.h > +++ b/gcc/tree-data-ref.h > @@ -603,6 +603,17 @@ void remove_similar_memory_refs (VEC (gimple, heap) **); > bool rdg_defs_used_in_other_loops_p (struct graph *, int); > bool have_similar_memory_accesses (gimple, gimple); > > +/* Returns true when STRIDE is equal in absolute value to the size of > + the unit type of TYPE. */ > + > +static inline bool > +stride_of_unit_type_p (tree stride, tree type) > +{ > + return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride), > + stride), > + TYPE_SIZE_UNIT (type)); > +} > + > /* Determines whether RDG vertices V1 and V2 access to similar memory > locations, in which case they have to be in the same partition. */ > > diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c > index 007c4f3..357f51f 100644 > --- a/gcc/tree-loop-distribution.c > +++ b/gcc/tree-loop-distribution.c > @@ -258,42 +258,27 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter, > if (!dr_analyze_innermost (dr)) > goto end; > > - /* Test for a positive stride, iterating over every element. */ > - if (integer_zerop (size_binop (MINUS_EXPR, > - fold_convert (sizetype, DR_STEP (dr)), > - TYPE_SIZE_UNIT (TREE_TYPE (op0))))) > - { > - addr_base = fold_convert_loc (loc, sizetype, > - size_binop_loc (loc, PLUS_EXPR, > - DR_OFFSET (dr), > - DR_INIT (dr))); > - addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, > - TREE_TYPE (DR_BASE_ADDRESS (dr)), > - DR_BASE_ADDRESS (dr), addr_base); > - > - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); > - } > + if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))) > + goto end; > + > + nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); > + addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); > + addr_base = fold_convert_loc (loc, sizetype, addr_base); > > /* Test for a negative stride, iterating over every element. */ > - else if (integer_zerop (size_binop (PLUS_EXPR, > - TYPE_SIZE_UNIT (TREE_TYPE (op0)), > - fold_convert (sizetype, DR_STEP (dr))))) > + if (integer_zerop (size_binop (PLUS_EXPR, > + TYPE_SIZE_UNIT (TREE_TYPE (op0)), > + fold_convert (sizetype, DR_STEP (dr))))) > { > - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); > - > - addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); > - addr_base = fold_convert_loc (loc, sizetype, addr_base); > addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, > fold_convert_loc (loc, sizetype, nb_bytes)); > addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, > TYPE_SIZE_UNIT (TREE_TYPE (op0))); > - addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, > - TREE_TYPE (DR_BASE_ADDRESS (dr)), > - DR_BASE_ADDRESS (dr), addr_base); > } > - else > - goto end; > > + addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, > + TREE_TYPE (DR_BASE_ADDRESS (dr)), > + DR_BASE_ADDRESS (dr), addr_base); > mem = force_gimple_operand (addr_base, &stmts, true, NULL); > gimple_seq_add_seq (&stmt_list, stmts); > > @@ -781,8 +766,9 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c, > and determine those vertices that have some memory affinity with > the current nodes in the component: these are stores to the same > arrays, i.e. we're taking care of cache locality. */ > - rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, > - other_stores); > + if (!flag_tree_loop_distribute_patterns) > + rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, > + other_stores); > > rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); > > -- > 1.7.1 > >
On Wed, 1 Dec 2010, Sebastian Pop wrote: > On Wed, Dec 1, 2010 at 11:15, Sebastian Pop <sebpop@gmail.com> wrote: > > Hi, > > > > here is the updated patch that addresses the review comments. > > Ok for trunk after bootstrap and test? > > This patch passed bootstrap and test on amd64-linux. Ok. Thanks, Richard. > Sebastian > > > > > Thanks, > > Sebastian > > > > 2010-11-30 Sebastian Pop <sebastian.pop@amd.com> > > > > PR tree-optimization/45199 > > * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New. > > (stores_zero_from_loop): Call > > mem_write_stride_of_same_size_as_unit_type_p. > > * tree-data-ref.h (stride_of_unit_type_p): New. > > * tree-loop-distribution.c (generate_memset_zero): Simplified. > > Call stride_of_unit_type_p. > > (build_rdg_partition_for_component): Do not call > > rdg_flag_similar_memory_accesses when > > flag_tree_loop_distribute_patterns is set. > > > > * gcc.dg/tree-ssa/ldist-15.c: New. > > * gcc.dg/tree-ssa/ldist-16.c: New. > > * gfortran.dg/ldist-pr45199.f: New. > > --- > > gcc/ChangeLog | 13 ++++++++ > > gcc/testsuite/ChangeLog | 7 ++++ > > gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c | 23 +++++++++++++++ > > gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c | 21 ++++++++++++++ > > gcc/testsuite/gfortran.dg/ldist-pr45199.f | 27 +++++++++++++++++ > > gcc/tree-data-ref.c | 24 +++++++++++++++- > > gcc/tree-data-ref.h | 11 +++++++ > > gcc/tree-loop-distribution.c | 44 ++++++++++------------------- > > 8 files changed, 140 insertions(+), 30 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c > > create mode 100644 gcc/testsuite/gfortran.dg/ldist-pr45199.f > > > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > > index 5706ef8..2881d58 100644 > > --- a/gcc/ChangeLog > > +++ b/gcc/ChangeLog > > @@ -1,3 +1,16 @@ > > +2010-11-30 Sebastian Pop <sebastian.pop@amd.com> > > + > > + PR tree-optimization/45199 > > + * tree-data-ref.c (mem_write_stride_of_same_size_as_unit_type_p): New. > > + (stores_zero_from_loop): Call > > + mem_write_stride_of_same_size_as_unit_type_p. > > + * tree-data-ref.h (stride_of_unit_type_p): New. > > + * tree-loop-distribution.c (generate_memset_zero): Simplified. > > + Call stride_of_unit_type_p. > > + (build_rdg_partition_for_component): Do not call > > + rdg_flag_similar_memory_accesses when > > + flag_tree_loop_distribute_patterns is set. > > + > > 2010-11-29 H.J. Lu <hongjiu.lu@intel.com> > > > > PR driver/46712 > > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > > index 5792e66..20aaf04 100644 > > --- a/gcc/testsuite/ChangeLog > > +++ b/gcc/testsuite/ChangeLog > > @@ -1,3 +1,10 @@ > > +2010-11-30 Sebastian Pop <sebastian.pop@amd.com> > > + > > + PR tree-optimization/45199 > > + * gcc.dg/tree-ssa/ldist-15.c: New. > > + * gcc.dg/tree-ssa/ldist-16.c: New. > > + * gfortran.dg/ldist-pr45199.f: New. > > + > > 2010-11-29 Nicola Pero <nicola.pero@meta-innovation.com> > > > > * objc.dg/duplicate-class-1.m: New. > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c > > new file mode 100644 > > index 0000000..7ce3b95 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c > > @@ -0,0 +1,23 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ > > + > > +int x[1000]; > > + > > +void foo (int n) > > +{ > > + int i; > > + > > + for (i = 0; i < n; ++i) > > + { > > + x[2*i] = 0; > > + x[2*i + 1] = 1; > > + } > > +} > > + > > +/* We should not apply loop distribution as it is not beneficial from > > + a data locality point of view. Also it is not possible to generate > > + a memset (0) as the write has a stride of 2. */ > > + > > +/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */ > > +/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */ > > +/* { dg-final { cleanup-tree-dump "ldist" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c > > new file mode 100644 > > index 0000000..61e8e56 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c > > @@ -0,0 +1,21 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ > > + > > +int x[1000]; > > + > > +void foo (int n) > > +{ > > + int i; > > + > > + for (i = 0; i < n; ++i) > > + { > > + x[i] = 0; > > + x[2*i + 1] = 1; > > + } > > +} > > + > > +/* We should apply loop distribution and generate a memset (0). */ > > + > > +/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */ > > +/* { dg-final { cleanup-tree-dump "ldist" } } */ > > diff --git a/gcc/testsuite/gfortran.dg/ldist-pr45199.f b/gcc/testsuite/gfortran.dg/ldist-pr45199.f > > new file mode 100644 > > index 0000000..2fab43a > > --- /dev/null > > +++ b/gcc/testsuite/gfortran.dg/ldist-pr45199.f > > @@ -0,0 +1,27 @@ > > +! { dg-do compile } > > +! { dg-options "-O3 -fdump-tree-ldist-details" } > > + > > + parameter(numlev=3,numoblev=1000) > > + integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev) > > + logical l_numob(numoblev,numlev) > > + do ixe=1,numoblev > > + do iye=1,numlev > > + i_otyp(ixe,iye)=0 > > + i_styp(ixe,iye)=0 > > + l_numob(ixe,iye)=.false. > > + enddo > > + enddo > > + do i=1,m > > + do j=1,n > > + if (l_numob(i,j)) then > > + write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j) > > + endif > > + enddo > > + enddo > > + end > > + > > +! GCC should apply memset zero loop distribution and it should not ICE. > > + > > +! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } } > > +! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } } > > +! { dg-final { cleanup-tree-dump "ldist" } } > > diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c > > index 3cee320..094d168 100644 > > --- a/gcc/tree-data-ref.c > > +++ b/gcc/tree-data-ref.c > > @@ -4974,6 +4974,27 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) > > free (bbs); > > } > > > > +/* Returns true when STMT is an assignment that contains a data > > + reference on its LHS with a stride of the same size as its unit > > + type. */ > > + > > +static bool > > +mem_write_stride_of_same_size_as_unit_type_p (gimple stmt) > > +{ > > + struct data_reference *dr = XCNEW (struct data_reference); > > + tree op0 = gimple_assign_lhs (stmt); > > + bool res; > > + > > + DR_STMT (dr) = stmt; > > + DR_REF (dr) = op0; > > + > > + res = dr_analyze_innermost (dr) > > + && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); > > + > > + free_data_ref (dr); > > + return res; > > +} > > + > > /* Initialize STMTS with all the statements of LOOP that contain a > > store to memory of the form "A[i] = 0". */ > > > > @@ -4994,7 +5015,8 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) > > && is_gimple_assign (stmt) > > && gimple_assign_rhs_code (stmt) == INTEGER_CST > > && (op = gimple_assign_rhs1 (stmt)) > > - && (integer_zerop (op) || real_zerop (op))) > > + && (integer_zerop (op) || real_zerop (op)) > > + && mem_write_stride_of_same_size_as_unit_type_p (stmt)) > > VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); > > > > free (bbs); > > diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h > > index 844a2ae..d929f31 100644 > > --- a/gcc/tree-data-ref.h > > +++ b/gcc/tree-data-ref.h > > @@ -603,6 +603,17 @@ void remove_similar_memory_refs (VEC (gimple, heap) **); > > bool rdg_defs_used_in_other_loops_p (struct graph *, int); > > bool have_similar_memory_accesses (gimple, gimple); > > > > +/* Returns true when STRIDE is equal in absolute value to the size of > > + the unit type of TYPE. */ > > + > > +static inline bool > > +stride_of_unit_type_p (tree stride, tree type) > > +{ > > + return tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (stride), > > + stride), > > + TYPE_SIZE_UNIT (type)); > > +} > > + > > /* Determines whether RDG vertices V1 and V2 access to similar memory > > locations, in which case they have to be in the same partition. */ > > > > diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c > > index 007c4f3..357f51f 100644 > > --- a/gcc/tree-loop-distribution.c > > +++ b/gcc/tree-loop-distribution.c > > @@ -258,42 +258,27 @@ generate_memset_zero (gimple stmt, tree op0, tree nb_iter, > > if (!dr_analyze_innermost (dr)) > > goto end; > > > > - /* Test for a positive stride, iterating over every element. */ > > - if (integer_zerop (size_binop (MINUS_EXPR, > > - fold_convert (sizetype, DR_STEP (dr)), > > - TYPE_SIZE_UNIT (TREE_TYPE (op0))))) > > - { > > - addr_base = fold_convert_loc (loc, sizetype, > > - size_binop_loc (loc, PLUS_EXPR, > > - DR_OFFSET (dr), > > - DR_INIT (dr))); > > - addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, > > - TREE_TYPE (DR_BASE_ADDRESS (dr)), > > - DR_BASE_ADDRESS (dr), addr_base); > > - > > - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); > > - } > > + if (!stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))) > > + goto end; > > + > > + nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); > > + addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); > > + addr_base = fold_convert_loc (loc, sizetype, addr_base); > > > > /* Test for a negative stride, iterating over every element. */ > > - else if (integer_zerop (size_binop (PLUS_EXPR, > > - TYPE_SIZE_UNIT (TREE_TYPE (op0)), > > - fold_convert (sizetype, DR_STEP (dr))))) > > + if (integer_zerop (size_binop (PLUS_EXPR, > > + TYPE_SIZE_UNIT (TREE_TYPE (op0)), > > + fold_convert (sizetype, DR_STEP (dr))))) > > { > > - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); > > - > > - addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); > > - addr_base = fold_convert_loc (loc, sizetype, addr_base); > > addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, > > fold_convert_loc (loc, sizetype, nb_bytes)); > > addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, > > TYPE_SIZE_UNIT (TREE_TYPE (op0))); > > - addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, > > - TREE_TYPE (DR_BASE_ADDRESS (dr)), > > - DR_BASE_ADDRESS (dr), addr_base); > > } > > - else > > - goto end; > > > > + addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, > > + TREE_TYPE (DR_BASE_ADDRESS (dr)), > > + DR_BASE_ADDRESS (dr), addr_base); > > mem = force_gimple_operand (addr_base, &stmts, true, NULL); > > gimple_seq_add_seq (&stmt_list, stmts); > > > > @@ -781,8 +766,9 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c, > > and determine those vertices that have some memory affinity with > > the current nodes in the component: these are stores to the same > > arrays, i.e. we're taking care of cache locality. */ > > - rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, > > - other_stores); > > + if (!flag_tree_loop_distribute_patterns) > > + rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, > > + other_stores); > > > > rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); > > > > -- > > 1.7.1 > > > > > >
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5706ef8..b73aeae 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2010-11-30 Sebastian Pop <sebastian.pop@amd.com> + + PR tree-optimization/45199 + * tree-data-ref.c (dataref_stride_has_unit_type_stride): New. + (stores_zero_from_loop): Call dataref_stride_has_unit_type_stride. + * tree-loop-distribution.c (build_rdg_partition_for_component): Do + not call rdg_flag_similar_memory_accesses when + flag_tree_loop_distribute_patterns is set. + 2010-11-29 H.J. Lu <hongjiu.lu@intel.com> PR driver/46712 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5792e66..20aaf04 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2010-11-30 Sebastian Pop <sebastian.pop@amd.com> + + PR tree-optimization/45199 + * gcc.dg/tree-ssa/ldist-15.c: New. + * gcc.dg/tree-ssa/ldist-16.c: New. + * gfortran.dg/ldist-pr45199.f: New. + 2010-11-29 Nicola Pero <nicola.pero@meta-innovation.com> * objc.dg/duplicate-class-1.m: New. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c new file mode 100644 index 0000000..7ce3b95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-15.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ + +int x[1000]; + +void foo (int n) +{ + int i; + + for (i = 0; i < n; ++i) + { + x[2*i] = 0; + x[2*i + 1] = 1; + } +} + +/* We should not apply loop distribution as it is not beneficial from + a data locality point of view. Also it is not possible to generate + a memset (0) as the write has a stride of 2. */ + +/* { dg-final { scan-tree-dump-not "distributed: split to" "ldist" } } */ +/* { dg-final { scan-tree-dump-not "__builtin_memset" "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c new file mode 100644 index 0000000..61e8e56 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ldist-details" } */ + +int x[1000]; + +void foo (int n) +{ + int i; + + for (i = 0; i < n; ++i) + { + x[i] = 0; + x[2*i + 1] = 1; + } +} + +/* We should apply loop distribution and generate a memset (0). */ + +/* { dg-final { scan-tree-dump "distributed: split to 2" "ldist" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_memset" 2 "ldist" } } */ +/* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/testsuite/gfortran.dg/ldist-pr45199.f b/gcc/testsuite/gfortran.dg/ldist-pr45199.f new file mode 100644 index 0000000..2fab43a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/ldist-pr45199.f @@ -0,0 +1,27 @@ +! { dg-do compile } +! { dg-options "-O3 -fdump-tree-ldist-details" } + + parameter(numlev=3,numoblev=1000) + integer i_otyp(numoblev,numlev), i_styp(numoblev,numlev) + logical l_numob(numoblev,numlev) + do ixe=1,numoblev + do iye=1,numlev + i_otyp(ixe,iye)=0 + i_styp(ixe,iye)=0 + l_numob(ixe,iye)=.false. + enddo + enddo + do i=1,m + do j=1,n + if (l_numob(i,j)) then + write(20,'(7I4,F12.2,4F16.10)') i_otyp(i,j),i_styp(i,j) + endif + enddo + enddo + end + +! GCC should apply memset zero loop distribution and it should not ICE. + +! { dg-final { scan-tree-dump "distributed: split to 9 loops" "ldist" } } +! { dg-final { scan-tree-dump-times "__builtin_memset" 18 "ldist" } } +! { dg-final { cleanup-tree-dump "ldist" } } diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 3cee320..17f34de 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -4974,6 +4974,30 @@ stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) free (bbs); } +/* Returns true when STMT is an assignment that contains a data + reference on its LHS with a stride of the same size as its unit + type. This is the same test as in generate_memset_zero. */ + +static bool +dataref_stride_has_unit_type_stride (gimple stmt) +{ + struct data_reference *dr = XCNEW (struct data_reference); + tree op0 = gimple_assign_lhs (stmt); + + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; + if (!dr_analyze_innermost (dr)) + return false; + + return dr_analyze_innermost (dr) && + (integer_zerop (size_binop (MINUS_EXPR, + fold_convert (sizetype, DR_STEP (dr)), + TYPE_SIZE_UNIT (TREE_TYPE (op0)))) + || integer_zerop (size_binop (PLUS_EXPR, + TYPE_SIZE_UNIT (TREE_TYPE (op0)), + fold_convert (sizetype, DR_STEP (dr))))); You leak dr. && goes to the next line. You want tree_int_cst_equal (fold_unary (ABS_EXPR, TREE_TYPE (DR_STEP (dr)), DR_STEP (dr)), TYPE_SIZE_UNIT (...)) if this is the same test as in generate_memset_zero please put this helper in tree-loop-distribution and use it there as well. +} + /* Initialize STMTS with all the statements of LOOP that contain a store to memory of the form "A[i] = 0". */ @@ -4994,7 +5018,8 @@ stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) && is_gimple_assign (stmt) && gimple_assign_rhs_code (stmt) == INTEGER_CST && (op = gimple_assign_rhs1 (stmt)) - && (integer_zerop (op) || real_zerop (op))) + && (integer_zerop (op) || real_zerop (op)) + && dataref_stride_has_unit_type_stride (stmt)) VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); free (bbs); diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 007c4f3..2c2af2c 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -781,8 +781,9 @@ build_rdg_partition_for_component (struct graph *rdg, rdgc c, and determine those vertices that have some memory affinity with the current nodes in the component: these are stores to the same arrays, i.e. we're taking care of cache locality. */ - rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, - other_stores); + if (!flag_tree_loop_distribute_patterns) + rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, + other_stores); rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);