Message ID | 560D2B09.4020501@mentor.com |
---|---|
State | New |
Headers | show |
On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote: > this patch adds initialization in zero_iter_bb of counters introduced in > expand_omp_for_init_counts. > > This removes the need to set TREE_NO_WARNING on those counters. Why do you think it is a good idea? I'd be afraid it slows things down unnecessarily. Furthermore, I'd prefer not to change this area of code before gomp-4_1-branch is merged, as it will be a nightmare for the merge otherwise. Jakub
On 01/10/15 14:49, Jakub Jelinek wrote: > On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote: >> this patch adds initialization in zero_iter_bb of counters introduced in >> expand_omp_for_init_counts. >> >> This removes the need to set TREE_NO_WARNING on those counters. > > Why do you think it is a good idea? In replace_ssa_name, I've recently added the assert: ... gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name)); ... On the gomp-4_0-branch, this assert triggers for a collapsed acc loop, which uses expand_omp_for_generic for omp-expansion. The assert triggers because (some of) the counters added by expand_omp_for_init_counts are not initialized on all paths. On trunk, for the test-case in the patch, this assert doesn't trigger because the omp function is split off before ssa. > I'd be afraid it slows things down unnecessarily. I think zero_iter_bb is a block that is expected not to be executed frequently. I've attached an sdiff of x86_64 assembly for the test-case (before left, after right). AFAICT, this patch has the effect that it speeds up the frequent path with one instruction. > Furthermore, I'd prefer not to change this area of code before > gomp-4_1-branch is merged, as it will be a nightmare for the merge > otherwise. Committing to gomp-4_0-branch for now would work for me. Thanks, - Tom .file "collapse.c" .file "collapse.c" .text .text .p2align 4,,15 .p2align 4,,15 .type foo._omp_fn.0, @funct .type foo._omp_fn.0, @funct foo._omp_fn.0: foo._omp_fn.0: .LFB12: .LFB12: .cfi_startproc .cfi_startproc pushq %rbp pushq %rbp .cfi_def_cfa_offset 16 .cfi_def_cfa_offset 16 .cfi_offset 6, -16 .cfi_offset 6, -16 pushq %rbx pushq %rbx .cfi_def_cfa_offset 24 .cfi_def_cfa_offset 24 .cfi_offset 3, -24 .cfi_offset 3, -24 xorl %esi, %esi < subq $24, %rsp subq $24, %rsp .cfi_def_cfa_offset 48 .cfi_def_cfa_offset 48 movl (%rdi), %eax movl (%rdi), %eax movl 4(%rdi), %ebp movl 4(%rdi), %ebp testl %eax, %eax testl %eax, %eax jle .L8 | jle .L9 testl %ebp, %ebp testl %ebp, %ebp jle .L8 | jle .L9 movslq %ebp, %rbx movslq %ebp, %rbx movslq %eax, %rsi movslq %eax, %rsi imulq %rbx, %rsi imulq %rbx, %rsi .L8: .L8: leaq 8(%rsp), %r8 leaq 8(%rsp), %r8 xorl %edi, %edi xorl %edi, %edi movq %rsp, %rcx movq %rsp, %rcx movl $1, %edx movl $1, %edx call GOMP_loop_runtime_sta call GOMP_loop_runtime_sta testb %al, %al testb %al, %al jne .L10 jne .L10 .L6: .L6: call GOMP_loop_end_nowait call GOMP_loop_end_nowait addq $24, %rsp addq $24, %rsp .cfi_remember_state .cfi_remember_state .cfi_def_cfa_offset 24 .cfi_def_cfa_offset 24 popq %rbx popq %rbx .cfi_def_cfa_offset 16 .cfi_def_cfa_offset 16 popq %rbp popq %rbp .cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8 ret ret .p2align 4,,10 .p2align 4,,10 .p2align 3 .p2align 3 .L15: .L15: .cfi_restore_state .cfi_restore_state leaq 8(%rsp), %rsi leaq 8(%rsp), %rsi movq %rsp, %rdi movq %rsp, %rdi call GOMP_loop_runtime_nex call GOMP_loop_runtime_nex testb %al, %al testb %al, %al je .L6 je .L6 .L10: .L10: movq (%rsp), %rsi movq (%rsp), %rsi movq 8(%rsp), %r9 movq 8(%rsp), %r9 movq %rsi, %rax movq %rsi, %rax cqto cqto idivq %rbx idivq %rbx movslq %eax, %r8 movslq %eax, %r8 .p2align 4,,10 .p2align 4,,10 .p2align 3 .p2align 3 .L4: .L4: leaq (%r8,%r8,4), %rdi leaq (%r8,%r8,4), %rdi movslq %edx, %rcx movslq %edx, %rcx addq $1, %rsi addq $1, %rsi cmpq %rsi, %r9 cmpq %rsi, %r9 leaq (%rdi,%rdi,4), %rdi leaq (%rdi,%rdi,4), %rdi leaq (%rcx,%rdi,4), %rcx leaq (%rcx,%rdi,4), %rcx movl $1, a(,%rcx,4) movl $1, a(,%rcx,4) jle .L15 jle .L15 addl $1, %edx addl $1, %edx cmpl %edx, %ebp cmpl %edx, %ebp jg .L4 jg .L4 addl $1, %eax addl $1, %eax xorl %edx, %edx xorl %edx, %edx movslq %eax, %r8 movslq %eax, %r8 jmp .L4 jmp .L4 > .L9: > xorl %ebx, %ebx > xorl %esi, %esi > jmp .L8 .cfi_endproc .cfi_endproc .LFE12: .LFE12: .size foo._omp_fn.0, .-foo. .size foo._omp_fn.0, .-foo. .p2align 4,,15 .p2align 4,,15 .globl foo .globl foo .type foo, @function .type foo, @function foo: foo: .LFB10: .LFB10: .cfi_startproc .cfi_startproc subq $24, %rsp subq $24, %rsp .cfi_def_cfa_offset 32 .cfi_def_cfa_offset 32 xorl %ecx, %ecx xorl %ecx, %ecx xorl %edx, %edx xorl %edx, %edx movl %edi, (%rsp) movl %edi, (%rsp) movl %esi, 4(%rsp) movl %esi, 4(%rsp) movl $foo._omp_fn.0, %edi movl $foo._omp_fn.0, %edi movq %rsp, %rsi movq %rsp, %rsi call GOMP_parallel call GOMP_parallel addq $24, %rsp addq $24, %rsp .cfi_def_cfa_offset 8 .cfi_def_cfa_offset 8 ret ret .cfi_endproc .cfi_endproc .LFE10: .LFE10: .size foo, .-foo .size foo, .-foo
[ was: Re: [patch] Add counter inits to zero_iter_bb in expand_omp_for_init_counts ] On 01/10/15 15:37, Tom de Vries wrote: > On 01/10/15 14:49, Jakub Jelinek wrote: >> On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote: >>> this patch adds initialization in zero_iter_bb of counters introduced in >>> expand_omp_for_init_counts. >>> >>> This removes the need to set TREE_NO_WARNING on those counters. >> >> Why do you think it is a good idea? > > In replace_ssa_name, I've recently added the assert: > ... > gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name)); > ... > > On the gomp-4_0-branch, this assert triggers for a collapsed acc loop, > which uses expand_omp_for_generic for omp-expansion. The assert > triggers because (some of) the counters added by > expand_omp_for_init_counts are not initialized on all paths. > > On trunk, for the test-case in the patch, this assert doesn't trigger > because the omp function is split off before ssa. > >> I'd be afraid it slows things down unnecessarily. > > I think zero_iter_bb is a block that is expected not to be executed > frequently. > > I've attached an sdiff of x86_64 assembly for the test-case (before > left, after right). AFAICT, this patch has the effect that it speeds up > the frequent path with one instruction. > >> Furthermore, I'd prefer not to change this area of code before >> gomp-4_1-branch is merged, as it will be a nightmare for the merge >> otherwise. > > Committing to gomp-4_0-branch for now would work for me. > Committed to gomp-4_0-branch. Thanks, - Tom
Hi Jakub and Tom! On Thu, 1 Oct 2015 15:37:26 +0200, Tom de Vries <Tom_deVries@mentor.com> wrote: > On 01/10/15 14:49, Jakub Jelinek wrote: > > On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote: > >> this patch adds initialization in zero_iter_bb of counters introduced in > >> expand_omp_for_init_counts. > >> > >> This removes the need to set TREE_NO_WARNING on those counters. > > > > Why do you think it is a good idea? > > [...] > > Furthermore, I'd prefer not to change this area of code before > > gomp-4_1-branch is merged, as it will be a nightmare for the merge > > otherwise. > > Committing to gomp-4_0-branch for now would work for me. Well, the "nightmare" to merge this thus fell onto me... In particular, as part of my gomp-4_0-branch r229255, <http://news.gmane.org/find-root.php?message_id=%3C877fmd7lig.fsf%40schwinge.name%3E>, I "butchered" your (Tom's) code (gomp-4_0-branch r228595) to work in context of Jakub's changes -- would you please check that out (current gomp-4_0-branch)? Even though there are no testsuite regressions, given my lack of detailed understanding of this code, I'm not too sure about my changes. Here's, briefly, what I did: in gcc/omp-low.c:expand_omp_for_init_counts extend created_zero_iter_bb handling for fd->ordered; at the end of that function, individually decide (lame...) whether to use zero_iter1_bb or zero_iter2_bb; in gcc/omp-low.c:expand_omp_for_generic remove TREE_NO_WARNING code for the new zero_iter2_bb case, too. Grüße, Thomas
On 23/10/15 16:27, Thomas Schwinge wrote: > Hi Jakub and Tom! > > On Thu, 1 Oct 2015 15:37:26 +0200, Tom de Vries <Tom_deVries@mentor.com> wrote: >> On 01/10/15 14:49, Jakub Jelinek wrote: >>> On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote: >>>> this patch adds initialization in zero_iter_bb of counters introduced in >>>> expand_omp_for_init_counts. >>>> >>>> This removes the need to set TREE_NO_WARNING on those counters. >>> >>> Why do you think it is a good idea? >> >> [...] > >>> Furthermore, I'd prefer not to change this area of code before >>> gomp-4_1-branch is merged, as it will be a nightmare for the merge >>> otherwise. >> >> Committing to gomp-4_0-branch for now would work for me. > > Well, the "nightmare" to merge this thus fell onto me... In particular, > as part of my gomp-4_0-branch r229255, > <http://news.gmane.org/find-root.php?message_id=%3C877fmd7lig.fsf%40schwinge.name%3E>, > I "butchered" your (Tom's) code (gomp-4_0-branch r228595) to work in > context of Jakub's changes -- would you please check that out (current > gomp-4_0-branch)? Even though there are no testsuite regressions, given > my lack of detailed understanding of this code, I'm not too sure about my > changes. > > Here's, briefly, what I did: in gcc/omp-low.c:expand_omp_for_init_counts > extend created_zero_iter_bb handling for fd->ordered; at the end of that > function, individually decide (lame...) whether to use zero_iter1_bb or > zero_iter2_bb; in gcc/omp-low.c:expand_omp_for_generic remove > TREE_NO_WARNING code for the new zero_iter2_bb case, too. Hi Thomas, the merge looks good to me. Thanks, - Tom
Add counter inits to zero_iter_bb in expand_omp_for_init_counts 2015-10-01 Tom de Vries <tom@codesourcery.com> * omp-low.c (expand_omp_for_init_counts): Add inits for counters in zero_iter_bb. (expand_omp_for_generic): Remove TREE_NO_WARNING setttings on counters. * gcc.dg/gomp/collapse-2.c: New test. --- gcc/omp-low.c | 26 +++++++++++++++++++------- gcc/testsuite/gcc.dg/gomp/collapse-2.c | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/gomp/collapse-2.c diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 8bcad08..8181757 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -5732,6 +5732,7 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, return; } + bool created_zero_iter_bb = false; for (i = 0; i < fd->collapse; i++) { tree itype = TREE_TYPE (fd->loops[i].v); @@ -5774,6 +5775,7 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, entry_bb); + created_zero_iter_bb = true; } ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); ne->probability = REG_BR_PROB_BASE / 2000 - 1; @@ -5826,6 +5828,23 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, expand_omp_build_assign (gsi, fd->loop.n2, t); } } + + if (created_zero_iter_bb) + { + gimple_stmt_iterator gsi = gsi_after_labels (zero_iter_bb); + /* Atm counts[0] doesn't seem to be used beyond create_zero_iter_bb, + but for robustness-sake we include that one as well. */ + for (i = 0; i < fd->collapse; i++) + { + tree var = counts[i]; + if (!SSA_VAR_P (var)) + continue; + + tree zero = build_zero_cst (type); + gassign *assign_stmt = gimple_build_assign (var, zero); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + } + } } @@ -6116,7 +6135,6 @@ expand_omp_for_generic (struct omp_region *region, bool broken_loop = region->cont == NULL; edge e, ne; tree *counts = NULL; - int i; gcc_assert (!broken_loop || !in_combined_parallel); gcc_assert (fd->iter_type == long_integer_type_node @@ -6185,12 +6203,6 @@ expand_omp_for_generic (struct omp_region *region, if (zero_iter_bb) { - /* Some counts[i] vars might be uninitialized if - some loop has zero iterations. But the body shouldn't - be executed in that case, so just avoid uninit warnings. */ - for (i = first_zero_iter; i < fd->collapse; i++) - if (SSA_VAR_P (counts[i])) - TREE_NO_WARNING (counts[i]) = 1; gsi_prev (&gsi); e = split_block (entry_bb, gsi_stmt (gsi)); entry_bb = e->dest; diff --git a/gcc/testsuite/gcc.dg/gomp/collapse-2.c b/gcc/testsuite/gcc.dg/gomp/collapse-2.c new file mode 100644 index 0000000..5319f89 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/collapse-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fopenmp -fdump-tree-ssa" } */ + +#define N 100 + +int a[N][N]; + +void +foo (int m, int n) +{ + int i, j; +#pragma omp parallel +#pragma omp for collapse(2) schedule (runtime) + for (i = 0; i < m; i++) + for (j = 0; j < n; j++) + a[i][j] = 1; +} + +/* { dg-final { scan-tree-dump-not "(?n)PHI.*count.*\\(D\\)" "ssa" } } */ -- 1.9.1