diff mbox series

Fix modulo-scheduler -fcompare-debug issues

Message ID b9ec83f2-d11f-b74f-1e95-1bef27616281@ispras.ru
State New
Headers show
Series Fix modulo-scheduler -fcompare-debug issues | expand

Commit Message

Roman Zhuykov March 10, 2020, 4:39 p.m. UTC
Hi!

Current modulo-sched implementation is a bit faulty from -fcompile-debug perspective.

I found that few years ago, the most simple example is pr42631.c which fails (with just -fmodulo-sched or together with -fmodulo-sched-allow-regmoves) on powerpc64le with at least any gcc-4.9 or newer compiler.
I've investigated that difference about 3 years ago, it is mostly technical, dumps shows there are some "flying" NOTE_INSN_DELETED items.
I understood that it is minor, and I planned to commit the fix only when my other modulo-sched stuff will be ready.

But right now I see that when I enable -fmodulo-sched by default, powerpc64le bootstrap give comparison failure as of r10-7056.

Comparing stages 2 and 3
Bootstrap comparison failure!
gcc/ggc-page.o differs

That doesn't happen on released branches, so it is a kind of "regression" (certainly, nobody runs bootstrap with -fmodulo-sched).

Is that a good reason to commit the patch right now in stage4?

Patch was successfully regstrapped (based on r10-7056) using x86_64 and powerpc64le, both with default options and with -fmodulo-sched enabled.

Roman

--
modulo-sched: fix compare-debug issues
    
This patch fixes bootstrap comparison failure on powerpc64le when running it
with -fmodulo-sched enabled.
    
When applying the schedule we have to move debug insns in the same
way as we move note insns.  Also we have to discard adding debug insns
to SCCs in DDG graph.
    
20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
   
	* ddg.c (create_ddg_dep_from_intra_loop_link): Adjust assertions.
	(create_ddg_dep_no_link): Likewise.
	(add_inter_loop_mem_dep): Do not create "debug --> non-debug" anti-deps.
	(create_ddg): Adjust first_note field filling.
	(check_sccs): Assert if any debug instruction is in SCC.
	* modulo-sched.c (ps_first_note): Add an assertion if first_note
	is empty.
    
testsuite:
    
20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
    
	* gcc.dg/pr42631-sms1.c: New test.
	* gcc.dg/pr42631-sms2.c: New test.

Comments

Richard Biener March 11, 2020, 8:14 a.m. UTC | #1
On Tue, 10 Mar 2020, Roman Zhuykov wrote:

> Hi!
> 
> Current modulo-sched implementation is a bit faulty from -fcompile-debug perspective.
> 
> I found that few years ago, the most simple example is pr42631.c which fails (with just -fmodulo-sched or together with -fmodulo-sched-allow-regmoves) on powerpc64le with at least any gcc-4.9 or newer compiler.
> I've investigated that difference about 3 years ago, it is mostly technical, dumps shows there are some "flying" NOTE_INSN_DELETED items.
> I understood that it is minor, and I planned to commit the fix only when my other modulo-sched stuff will be ready.
> 
> But right now I see that when I enable -fmodulo-sched by default, powerpc64le bootstrap give comparison failure as of r10-7056.
> 
> Comparing stages 2 and 3
> Bootstrap comparison failure!
> gcc/ggc-page.o differs
> 
> That doesn't happen on released branches, so it is a kind of "regression" (certainly, nobody runs bootstrap with -fmodulo-sched).
> 
> Is that a good reason to commit the patch right now in stage4?

Yes from a RM perspective, no comments about the technical details of
the patch.

Richard.

> Patch was successfully regstrapped (based on r10-7056) using x86_64 and powerpc64le, both with default options and with -fmodulo-sched enabled.
> 
> Roman
> 
> --
> modulo-sched: fix compare-debug issues
>     
> This patch fixes bootstrap comparison failure on powerpc64le when running it
> with -fmodulo-sched enabled.
>     
> When applying the schedule we have to move debug insns in the same
> way as we move note insns.  Also we have to discard adding debug insns
> to SCCs in DDG graph.
>     
> 20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
>    
> 	* ddg.c (create_ddg_dep_from_intra_loop_link): Adjust assertions.
> 	(create_ddg_dep_no_link): Likewise.
> 	(add_inter_loop_mem_dep): Do not create "debug --> non-debug" anti-deps.
> 	(create_ddg): Adjust first_note field filling.
> 	(check_sccs): Assert if any debug instruction is in SCC.
> 	* modulo-sched.c (ps_first_note): Add an assertion if first_note
> 	is empty.
>     
> testsuite:
>     
> 20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
>     
> 	* gcc.dg/pr42631-sms1.c: New test.
> 	* gcc.dg/pr42631-sms2.c: New test.
> 
> diff --git a/gcc/ddg.c b/gcc/ddg.c
> index ca8cb74823..048207a354 100644
> --- a/gcc/ddg.c
> +++ b/gcc/ddg.c
> @@ -185,8 +185,8 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
>    else if (DEP_TYPE (link) == REG_DEP_OUTPUT)
>      t = OUTPUT_DEP;
>  
> -  gcc_assert (!DEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
> -  gcc_assert (!DEBUG_INSN_P (src_node->insn) || t == ANTI_DEP);
> +  gcc_assert (NONDEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
> +  gcc_assert (NONDEBUG_INSN_P (src_node->insn) || DEBUG_INSN_P (dest_node->insn));
>  
>    /* We currently choose not to create certain anti-deps edges and
>       compensate for that by generating reg-moves based on the life-range
> @@ -222,9 +222,9 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
>          }
>      }
>  
> -   latency = dep_cost (link);
> -   e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
> -   add_edge_to_ddg (g, e);
> +  latency = dep_cost (link);
> +  e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
> +  add_edge_to_ddg (g, e);
>  }
>  
>  /* The same as the above function, but it doesn't require a link parameter.  */
> @@ -237,8 +237,8 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to,
>    enum reg_note dep_kind;
>    struct _dep _dep, *dep = &_dep;
>  
> -  gcc_assert (!DEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
> -  gcc_assert (!DEBUG_INSN_P (from->insn) || d_t == ANTI_DEP);
> +  gcc_assert (NONDEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
> +  gcc_assert (NONDEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn));
>  
>    if (d_t == ANTI_DEP)
>      dep_kind = REG_DEP_ANTI;
> @@ -455,8 +455,12 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
>  	return;
>        else if (from->cuid != to->cuid)
>  	{
> -	  create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
> -	  if (DEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn))
> +	  gcc_assert (NONDEBUG_INSN_P (to->insn));
> +
> +	  if (NONDEBUG_INSN_P (from->insn))
> +	    create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
> +
> +	  if (DEBUG_INSN_P (from->insn))
>  	    create_ddg_dep_no_link (g, to, from, ANTI_DEP, MEM_DEP, 1);
>  	  else
>  	    create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
> @@ -607,28 +611,34 @@ create_ddg (basic_block bb, int closing_branch_deps)
>        if (! INSN_P (insn))
>  	{
>  	  if (! first_note && NOTE_P (insn)
> -	      && NOTE_KIND (insn) !=  NOTE_INSN_BASIC_BLOCK)
> +	      && NOTE_KIND (insn) != NOTE_INSN_BASIC_BLOCK)
>  	    first_note = insn;
>  	  continue;
>  	}
> +
>        if (JUMP_P (insn))
>  	{
>  	  gcc_assert (!g->closing_branch);
>  	  g->closing_branch = &g->nodes[i];
>  	}
> -      else if (GET_CODE (PATTERN (insn)) == USE)
> -	{
> -	  if (! first_note)
> -	    first_note = insn;
> -	  continue;
> -	}
> +
> +      if (! first_note)
> +	first_note = insn;
> +
> +      if (GET_CODE (PATTERN (insn)) == USE)
> +	continue;
>  
>        g->nodes[i].cuid = i;
>        g->nodes[i].successors = sbitmap_alloc (num_nodes);
>        bitmap_clear (g->nodes[i].successors);
>        g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
>        bitmap_clear (g->nodes[i].predecessors);
> -      g->nodes[i].first_note = (first_note ? first_note : insn);
> +
> +      if (! DEBUG_INSN_P (insn))
> +	{
> +	  g->nodes[i].first_note = first_note;
> +	  first_note = NULL;
> +	}
>  
>        g->nodes[i].aux.count = -1;
>        g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
> @@ -636,7 +646,6 @@ create_ddg (basic_block bb, int closing_branch_deps)
>  	g->nodes[i].max_dist[j] = -1;
>  
>        g->nodes[i++].insn = insn;
> -      first_note = NULL;
>      }
>  
>    /* We must have found a branch in DDG.  */
> @@ -1002,13 +1011,19 @@ order_sccs (ddg_all_sccs_ptr g)
>  static void
>  check_sccs (ddg_all_sccs_ptr sccs, int num_nodes)
>  {
> -  int i = 0;
> +  int i;
> +  unsigned int j;
>    auto_sbitmap tmp (num_nodes);
>  
>    bitmap_clear (tmp);
>    for (i = 0; i < sccs->num_sccs; i++)
>      {
> +      sbitmap_iterator sbi;
>        gcc_assert (!bitmap_empty_p (sccs->sccs[i]->nodes));
> +      /* Debug insns should not be in found SCCs.  */
> +      EXECUTE_IF_SET_IN_BITMAP (sccs->sccs[i]->nodes, 0, j, sbi)
> +	gcc_assert (NONDEBUG_INSN_P (sccs->ddg->nodes[j].insn));
> +
>        /* Verify that every node in sccs is in exactly one strongly
>           connected component.  */
>        gcc_assert (!bitmap_intersect_p (tmp, sccs->sccs[i]->nodes));
> diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
> index e16e023243..3f6aa8c7c2 100644
> --- a/gcc/modulo-sched.c
> +++ b/gcc/modulo-sched.c
> @@ -318,6 +318,7 @@ static rtx_insn *
>  ps_first_note (partial_schedule_ptr ps, int id)
>  {
>    gcc_assert (id < ps->g->num_nodes);
> +  gcc_assert (ps->g->nodes[id].first_note);
>    return ps->g->nodes[id].first_note;
>  }
>  
> diff --git a/gcc/testsuite/gcc.dg/pr42631-sms1.c b/gcc/testsuite/gcc.dg/pr42631-sms1.c
> new file mode 100644
> index 0000000000..0117276c73
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr42631-sms1.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-g -O1 -funroll-loops -fmodulo-sched -fcompare-debug" } */
> +/* { dg-xfail-if "" { powerpc-ibm-aix* } } */
> +
> +void foo()
> +{
> +  unsigned k;
> +  while (--k > 0);
> +}
> diff --git a/gcc/testsuite/gcc.dg/pr42631-sms2.c b/gcc/testsuite/gcc.dg/pr42631-sms2.c
> new file mode 100644
> index 0000000000..2555da4522
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr42631-sms2.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-g -O1 -funroll-loops -fmodulo-sched -fmodulo-sched-allow-regmoves -fcompare-debug" } */
> +/* { dg-xfail-if "" { powerpc-ibm-aix* } } */
> +
> +void foo()
> +{
> +  unsigned k;
> +  while (--k > 0);
> +}
>
Li, Pan2 via Gcc-patches March 12, 2020, 3:17 a.m. UTC | #2
On Tue, 2020-03-10 at 19:39 +0300, Roman Zhuykov wrote:
> Hi!
> 
> Current modulo-sched implementation is a bit faulty from -fcompile-debug
> perspective.
> 
> I found that few years ago, the most simple example is pr42631.c which fails
> (with just -fmodulo-sched or together with -fmodulo-sched-allow-regmoves) on
> powerpc64le with at least any gcc-4.9 or newer compiler.
> I've investigated that difference about 3 years ago, it is mostly technical,
> dumps shows there are some "flying" NOTE_INSN_DELETED items.
> I understood that it is minor, and I planned to commit the fix only when my
> other modulo-sched stuff will be ready.
> 
> But right now I see that when I enable -fmodulo-sched by default, powerpc64le
> bootstrap give comparison failure as of r10-7056.
> 
> Comparing stages 2 and 3
> Bootstrap comparison failure!
> gcc/ggc-page.o differs
> 
> That doesn't happen on released branches, so it is a kind of "regression"
> (certainly, nobody runs bootstrap with -fmodulo-sched).
> 
> Is that a good reason to commit the patch right now in stage4?
> 
> Patch was successfully regstrapped (based on r10-7056) using x86_64 and
> powerpc64le, both with default options and with -fmodulo-sched enabled.
> 
> Roman
> 
> --
> modulo-sched: fix compare-debug issues
>     
> This patch fixes bootstrap comparison failure on powerpc64le when running it
> with -fmodulo-sched enabled.
>     
> When applying the schedule we have to move debug insns in the same
> way as we move note insns.  Also we have to discard adding debug insns
> to SCCs in DDG graph.
>     
> 20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
>    
> 	* ddg.c (create_ddg_dep_from_intra_loop_link): Adjust assertions.
> 	(create_ddg_dep_no_link): Likewise.
> 	(add_inter_loop_mem_dep): Do not create "debug --> non-debug" anti-deps.
> 	(create_ddg): Adjust first_note field filling.
> 	(check_sccs): Assert if any debug instruction is in SCC.
> 	* modulo-sched.c (ps_first_note): Add an assertion if first_note
> 	is empty.
>     
> testsuite:
>     
> 20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
>     
> 	* gcc.dg/pr42631-sms1.c: New test.
> 	* gcc.dg/pr42631-sms2.c: New test.
Even though we don't have a BZ, I think a bootstrap failure, even one with
modulo-scheduling enabled is severe enough that we should try to fix it.

OK for the trunk.

jeff
>
Li, Pan2 via Gcc-patches March 20, 2020, 10:55 a.m. UTC | #3
Hi all!

12.03.2020 6:17, Jeff Law wrote:
>> Current modulo-sched implementation is a bit faulty from -fcompile-debug
>> perspective.
>>
>> But right now I see that when I enable -fmodulo-sched by default, powerpc64le
>> bootstrap give comparison failure as of r10-7056.
>>
>> Comparing stages 2 and 3
>> Bootstrap comparison failure!
>> gcc/ggc-page.o differs
>>
>> That doesn't happen on released branches, so it is a kind of "regression"
>> (certainly, nobody runs bootstrap with -fmodulo-sched).
> Even though we don't have a BZ, I think a bootstrap failure, even one with
> modulo-scheduling enabled is severe enough that we should try to fix it.
> 
> OK for the trunk.
> 
> jeff

11.03.2020 11:14, Richard Biener wrote:
> 
> Yes from a RM perspective, no comments about the technical details of
> the patch.
> 
> Richard.
> 

Haven't committed that patch yet but made some additional investigation.

(0) Running tests with -fcompare-debug (without -fmodulo-sched) shows a lot of different failures, I've created few PRs already and they were addressed (Thanks to Jakub!).

There are three kinds of issues.

(0a) Technical ones, I've not reported them.  Many checking techniques are not ready for the compiler to run twice for one driver invocation. E.g.

FAIL: c-c++-common/diagnostic-format-json-2.c  -Wc++-compat  (test for excess errors)

fails because second cc1 run prints additional '[]' to the output.

All tests in gcc.dg/pch directory have a similar issue.

(0b) When only dumps differ, but not the code, e.g. PR94223 and PR94167.

(0c) When it is a real "mistake", like PR94166 or PR94211.  Or -w difference like in PR94189.


Now, speaking about SMS, enabling it by default catches three (again!) kinds of separate issues with debug instructions.  And two first are observable for many years, while the third one is a bit more tricky and I caught it only in 2019.  My previous patch targeted (2) and (3) together, and now I have to split it.

(1) The first issue is that reordering doesn't touch debug instructions at all.  When we have found a proper schedule permute_partial_schedule function is called.  It constructs new order in reverse, first we move the instruction that should be prev_insn (jump) to its place, than the previous one, etc.
It doesn't consider moving debug instructions, so all of them are crowded before any non-debug instruction.  This certainly doesn't help much for debug information and on ppc64le causes

FAIL: gcc.dg/guality/loop-1.c   -O2  -DPREVENT_OPTIMIZATION  line 20 i == 1

and few other failures.  I never actually tried to fix those, maybe the best solution here is to drop all debug inside the loop when SMS succeeded.  But in this case we will also lose something - at least for now we have correct debug info when the loop was finished.


(2) The second issue is a "simple" -fcompare-debug failure, which actually can not lead to different code in .text (and can not cause bootstrap failure).  As I mentioned in previous mail it can be observed in pr42631.c example for ~10 years.  My previous patch and small attached patch2.txt (can be applied only after patch3.txt) solves this.  Note instructions "stick" to nearest following instruction and they are moved all together.  But when one compares -g0 and -g, the note can instead stick to a debug instruction, and according to (1) it will eventually "move up" together with that debug instruction instead of "sinking" with the following non-debug instruction.  An obvious solution is to extend the "sticking" approach to debug instructions themselves.  I've used that solution ("previous" patch) locally for ~3 years.  This change also affects the approach described in (1) but unfortunately doesn't fix any testcase and make something even worse:

FAIL: gcc.dg/guality/pr90716.c   -O1  -DPREVENT_OPTIMIZATION  line 23 j + 1 == 9

So, IMHO we should not apply this in stage4.

(3) And the last one we have to fix now if we bother about -fmodulo-sched ppc64le bootstrap.  Failing examples are "gcc -fcompare-debug -O2 -fmodulo-sched --param sms-min-sc=1 gcc/testsuite/gcc.dg/torture/pr87197.c" on ppc64le and "gcc -fcompare-debug -O2 gcc.c-torture/execute/pr70127.c" on aarch64.  Contrary to scheduler itself, when building DDG graph we consider all debug instructions and their dependencies.  This may sometimes lead to a different result in sms_order_nodes function, as it depends on SCCs in DDG.  My previous ad-hoc solution was just to remove any "debug->non-debug" edges in DDG.  Now I reimplemented that by fully removing debug instructions nodes.  I've tested patch3.txt a lot last week in different scenarios and will commit it in a week if no objection.


Roman
Author: Roman Zhuykov <zhroma@ispras.ru>
Date:   Thu Mar 12 19:36:22 2020 +0300

    modulo-sched: fix compare-debug issue with flying note insns
    
    This patch fixes -fcompare-debug failures in testsuite when it runs
    with -fmodulo-sched enabled.
    
    The approach is to move debug insns in the same way as note insns.
    Without the patch note insn which sticks to debug insn causes comparison
    failure, because it will stick to another insn when -g0 is used.
    
    20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
    
    	* ddg.c (create_ddg): Adjust first_note field filling.
    
    testsuite:
    
    20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
    
    	* gcc.dg/pr42631-sms1.c: New test.
    	* gcc.dg/pr42631-sms2.c: New test.

diff --git a/gcc/ddg.c b/gcc/ddg.c
--- a/gcc/ddg.c
+++ b/gcc/ddg.c
@@ -592,8 +592,8 @@ create_ddg (basic_block bb, int closing_branch_deps)
 	    g->nodes[i].max_dist[j] = -1;
 
 	  g->nodes[i++].insn = insn;
+	  first_note = NULL;
 	}
-      first_note = NULL;
     }
 
   /* We must have found a branch in DDG.  */
diff --git a/gcc/testsuite/gcc.dg/pr42631-sms1.c b/gcc/testsuite/gcc.dg/pr42631-sms1.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr42631-sms1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-g -O1 -funroll-loops -fmodulo-sched -fcompare-debug" } */
+/* { dg-xfail-if "" { powerpc-ibm-aix* } } */
+
+void foo()
+{
+  unsigned k;
+  while (--k > 0);
+}
diff --git a/gcc/testsuite/gcc.dg/pr42631-sms2.c b/gcc/testsuite/gcc.dg/pr42631-sms2.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr42631-sms2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-g -O1 -funroll-loops -fmodulo-sched -fmodulo-sched-allow-regmoves -fcompare-debug" } */
+/* { dg-xfail-if "" { powerpc-ibm-aix* } } */
+
+void foo()
+{
+  unsigned k;
+  while (--k > 0);
+}
Author: Roman Zhuykov <zhroma@ispras.ru>
Date:   Fri Mar 6 10:57:50 2020 +0300

    modulo-sched: fix bootstrap compare-debug issue
    
    This patch removes all debug insns from DDG analysis.  It fixes bootstrap
    comparison failure on powerpc64le when running with -fmodulo-sched enabled.
    
    20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
    
    	* ddg.c (create_ddg_dep_from_intra_loop_link): Remove assertions.
    	(create_ddg_dep_no_link): Likewise.
    	(add_cross_iteration_register_deps): Move debug instruction check.
    	Other minor refactoring.
    	(add_intra_loop_mem_dep): Do not check for debug instructions.
    	(add_inter_loop_mem_dep): Likewise.
    	(build_intra_loop_deps): Likewise.
    	(create_ddg): Do not include debug insns into the graph.
    	* ddg.h (struct ddg): Remove num_debug field.
    	* modulo-sched.c (doloop_register_get): Adjust condition.
    	(res_MII): Remove DDG num_debug field usage.
    	(sms_schedule_by_order): Use assertion against debug insns.
    	(ps_has_conflicts): Drop debug insn check.
    
    testsuite:
    
    20YY-MM-DD  Roman Zhuykov  <zhroma@ispras.ru>
    
    	* gcc.dg/torture/pr87197-debug-sms.c: New test.

diff --git a/gcc/ddg.c b/gcc/ddg.c
--- a/gcc/ddg.c
+++ b/gcc/ddg.c
@@ -185,9 +185,6 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
   else if (DEP_TYPE (link) == REG_DEP_OUTPUT)
     t = OUTPUT_DEP;
 
-  gcc_assert (!DEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
-  gcc_assert (!DEBUG_INSN_P (src_node->insn) || t == ANTI_DEP);
-
   /* We currently choose not to create certain anti-deps edges and
      compensate for that by generating reg-moves based on the life-range
      analysis.  The anti-deps that will be deleted are the ones which
@@ -222,9 +219,9 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
         }
     }
 
-   latency = dep_cost (link);
-   e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
-   add_edge_to_ddg (g, e);
+  latency = dep_cost (link);
+  e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
+  add_edge_to_ddg (g, e);
 }
 
 /* The same as the above function, but it doesn't require a link parameter.  */
@@ -237,9 +234,6 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to,
   enum reg_note dep_kind;
   struct _dep _dep, *dep = &_dep;
 
-  gcc_assert (!DEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
-  gcc_assert (!DEBUG_INSN_P (from->insn) || d_t == ANTI_DEP);
-
   if (d_t == ANTI_DEP)
     dep_kind = REG_DEP_ANTI;
   else if (d_t == OUTPUT_DEP)
@@ -272,16 +266,15 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to,
 static void
 add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
 {
-  int regno = DF_REF_REGNO (last_def);
   struct df_link *r_use;
   int has_use_in_bb_p = false;
-  rtx_insn *def_insn = DF_REF_INSN (last_def);
-  ddg_node_ptr last_def_node = get_node_of_insn (g, def_insn);
-  ddg_node_ptr use_node;
+  int regno = DF_REF_REGNO (last_def);
+  ddg_node_ptr last_def_node = get_node_of_insn (g, DF_REF_INSN (last_def));
   df_ref first_def = df_bb_regno_first_def_find (g->bb, regno);
+  ddg_node_ptr first_def_node = get_node_of_insn (g, DF_REF_INSN (first_def));
+  ddg_node_ptr use_node;
 
-  gcc_assert (last_def_node);
-  gcc_assert (first_def);
+  gcc_assert (last_def_node && first_def && first_def_node);
 
   if (flag_checking && DF_REF_ID (last_def) != DF_REF_ID (first_def))
     {
@@ -300,6 +293,9 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
 
       rtx_insn *use_insn = DF_REF_INSN (r_use->ref);
 
+      if (DEBUG_INSN_P (use_insn))
+	continue;
+
       /* ??? Do not handle uses with DF_REF_IN_NOTE notes.  */
       use_node = get_node_of_insn (g, use_insn);
       gcc_assert (use_node);
@@ -310,35 +306,28 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
 	     iteration.  Any such upwards exposed use appears before
 	     the last_def def.  */
 	  create_ddg_dep_no_link (g, last_def_node, use_node,
-				  DEBUG_INSN_P (use_insn) ? ANTI_DEP : TRUE_DEP,
-				  REG_DEP, 1);
+				  TRUE_DEP, REG_DEP, 1);
 	}
-      else if (!DEBUG_INSN_P (use_insn))
+      else
 	{
 	  /* Add anti deps from last_def's uses in the current iteration
 	     to the first def in the next iteration.  We do not add ANTI
 	     dep when there is an intra-loop TRUE dep in the opposite
 	     direction, but use regmoves to fix such disregarded ANTI
 	     deps when broken.	If the first_def reaches the USE then
-	     there is such a dep.  */
-	  ddg_node_ptr first_def_node = get_node_of_insn (g,
-							  DF_REF_INSN (first_def));
-
-	  gcc_assert (first_def_node);
-
-         /* Always create the edge if the use node is a branch in
-            order to prevent the creation of reg-moves.  
-            If the address that is being auto-inc or auto-dec in LAST_DEF
-            is used in USE_INSN then do not remove the edge to make sure
-            reg-moves will not be created for that address.  */
-          if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
-              || !flag_modulo_sched_allow_regmoves
+	     there is such a dep.
+	     Always create the edge if the use node is a branch in
+	     order to prevent the creation of reg-moves.
+	     If the address that is being auto-inc or auto-dec in LAST_DEF
+	     is used in USE_INSN then do not remove the edge to make sure
+	     reg-moves will not be created for that address.  */
+	  if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
+	      || !flag_modulo_sched_allow_regmoves
 	      || JUMP_P (use_node->insn)
-              || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
+	      || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
 	      || def_has_ccmode_p (DF_REF_INSN (last_def)))
-            create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
-                                    REG_DEP, 1);
-
+	    create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
+				    REG_DEP, 1);
 	}
     }
   /* Create an inter-loop output dependence between LAST_DEF (which is the
@@ -348,19 +337,11 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
      defs starting with a true dependence to a use which can be in the
      next iteration; followed by an anti dependence of that use to the
      first def (i.e. if there is a use between the two defs.)  */
-  if (!has_use_in_bb_p)
-    {
-      ddg_node_ptr dest_node;
-
-      if (DF_REF_ID (last_def) == DF_REF_ID (first_def))
-	return;
-
-      dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
-      gcc_assert (dest_node);
-      create_ddg_dep_no_link (g, last_def_node, dest_node,
-			      OUTPUT_DEP, REG_DEP, 1);
-    }
+  if (!has_use_in_bb_p && DF_REF_ID (last_def) != DF_REF_ID (first_def))
+    create_ddg_dep_no_link (g, last_def_node, first_def_node,
+			    OUTPUT_DEP, REG_DEP, 1);
 }
+
 /* Build inter-loop dependencies, by looking at DF analysis backwards.  */
 static void
 build_inter_loop_deps (ddg_ptr g)
@@ -417,13 +398,9 @@ add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
   if (mem_write_insn_p (from->insn))
     {
       if (mem_read_insn_p (to->insn))
-	create_ddg_dep_no_link (g, from, to,
-				DEBUG_INSN_P (to->insn)
-				? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
+	create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 0);
       else
-	create_ddg_dep_no_link (g, from, to,
-				DEBUG_INSN_P (to->insn)
-				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
+	create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 0);
     }
   else if (!mem_read_insn_p (to->insn))
     create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
@@ -441,13 +418,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
   if (mem_write_insn_p (from->insn))
     {
       if (mem_read_insn_p (to->insn))
-  	create_ddg_dep_no_link (g, from, to,
-				DEBUG_INSN_P (to->insn)
-				? ANTI_DEP : TRUE_DEP, MEM_DEP, 1);
+	create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 1);
       else if (from->cuid != to->cuid)
-  	create_ddg_dep_no_link (g, from, to,
-				DEBUG_INSN_P (to->insn)
-				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 1);
+	create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 1);
     }
   else
     {
@@ -456,13 +429,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
       else if (from->cuid != to->cuid)
 	{
 	  create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
-	  if (DEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn))
-	    create_ddg_dep_no_link (g, to, from, ANTI_DEP, MEM_DEP, 1);
-	  else
-	    create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
+	  create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
 	}
     }
-
 }
 
 /* Perform intra-block Data Dependency analysis and connect the nodes in
@@ -491,20 +460,10 @@ build_intra_loop_deps (ddg_ptr g)
       sd_iterator_def sd_it;
       dep_t dep;
 
-      if (! INSN_P (dest_node->insn))
-	continue;
-
       FOR_EACH_DEP (dest_node->insn, SD_LIST_BACK, sd_it, dep)
 	{
 	  rtx_insn *src_insn = DEP_PRO (dep);
-	  ddg_node_ptr src_node;
-
-	  /* Don't add dependencies on debug insns to non-debug insns
-	     to avoid codegen differences between -g and -g0.  */
-	  if (DEBUG_INSN_P (src_insn) && !DEBUG_INSN_P (dest_node->insn))
-	    continue;
-
-	  src_node = get_node_of_insn (g, src_insn);
+	  ddg_node_ptr src_node = get_node_of_insn (g, src_insn);
 
 	  if (!src_node)
 	    continue;
@@ -521,8 +480,7 @@ build_intra_loop_deps (ddg_ptr g)
 	  for (j = 0; j <= i; j++)
 	    {
 	      ddg_node_ptr j_node = &g->nodes[j];
-	      if (DEBUG_INSN_P (j_node->insn))
-		continue;
+
 	      if (mem_access_insn_p (j_node->insn))
 		{
 		  /* Don't bother calculating inter-loop dep if an intra-loop dep
@@ -573,23 +531,21 @@ create_ddg (basic_block bb, int closing_branch_deps)
   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
        insn = NEXT_INSN (insn))
     {
-      if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
+      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
 	continue;
 
-      if (DEBUG_INSN_P (insn))
-	g->num_debug++;
-      else
+      if (NONDEBUG_INSN_P (insn))
 	{
 	  if (mem_read_insn_p (insn))
 	    g->num_loads++;
 	  if (mem_write_insn_p (insn))
 	    g->num_stores++;
+	  num_nodes++;
 	}
-      num_nodes++;
     }
 
   /* There is nothing to do for this BB.  */
-  if ((num_nodes - g->num_debug) <= 1)
+  if (num_nodes <= 1)
     {
       free (g);
       return NULL;
@@ -604,38 +560,39 @@ create_ddg (basic_block bb, int closing_branch_deps)
   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
        insn = NEXT_INSN (insn))
     {
-      if (! INSN_P (insn))
-	{
-	  if (! first_note && NOTE_P (insn)
-	      && NOTE_KIND (insn) !=  NOTE_INSN_BASIC_BLOCK)
-	    first_note = insn;
-	  continue;
-	}
+      if (LABEL_P (insn) || NOTE_INSN_BASIC_BLOCK_P (insn))
+	continue;
+
+      if (!first_note && (INSN_P (insn) || NOTE_P (insn)))
+	first_note = insn;
+
+      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
+	continue;
+
       if (JUMP_P (insn))
 	{
 	  gcc_assert (!g->closing_branch);
 	  g->closing_branch = &g->nodes[i];
 	}
-      else if (GET_CODE (PATTERN (insn)) == USE)
+
+      if (NONDEBUG_INSN_P (insn))
 	{
-	  if (! first_note)
-	    first_note = insn;
-	  continue;
-	}
+	  g->nodes[i].cuid = i;
+	  g->nodes[i].successors = sbitmap_alloc (num_nodes);
+	  bitmap_clear (g->nodes[i].successors);
+	  g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
+	  bitmap_clear (g->nodes[i].predecessors);
 
-      g->nodes[i].cuid = i;
-      g->nodes[i].successors = sbitmap_alloc (num_nodes);
-      bitmap_clear (g->nodes[i].successors);
-      g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
-      bitmap_clear (g->nodes[i].predecessors);
-      g->nodes[i].first_note = (first_note ? first_note : insn);
+	  gcc_checking_assert (first_note);
+	  g->nodes[i].first_note = first_note;
 
-      g->nodes[i].aux.count = -1;
-      g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
-      for (j = 0; j < num_nodes; j++)
-	g->nodes[i].max_dist[j] = -1;
+	  g->nodes[i].aux.count = -1;
+	  g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
+	  for (j = 0; j < num_nodes; j++)
+	    g->nodes[i].max_dist[j] = -1;
 
-      g->nodes[i++].insn = insn;
+	  g->nodes[i++].insn = insn;
+	}
       first_note = NULL;
     }
 
diff --git a/gcc/ddg.h b/gcc/ddg.h
--- a/gcc/ddg.h
+++ b/gcc/ddg.h
@@ -116,9 +116,6 @@ struct ddg
   int num_loads;
   int num_stores;
 
-  /* Number of debug instructions in the BB.  */
-  int num_debug;
-
   /* This array holds the nodes in the graph; it is indexed by the node
      cuid, which follows the order of the instructions in the BB.  */
   ddg_node_ptr nodes;
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -369,7 +369,7 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
                              : prev_nondebug_insn (tail));
 
   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
-    if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
+    if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
       {
         if (dump_file)
         {
@@ -428,7 +428,7 @@ res_MII (ddg_ptr g)
   if (targetm.sched.sms_res_mii)
     return targetm.sched.sms_res_mii (g);
 
-  return ((g->num_nodes - g->num_debug) / issue_rate);
+  return g->num_nodes / issue_rate;
 }
 
 
@@ -2152,11 +2152,7 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
   	  ddg_node_ptr u_node = &ps->g->nodes[u];
 	  rtx_insn *insn = u_node->insn;
 
-	  if (!NONDEBUG_INSN_P (insn))
-	    {
-	      bitmap_clear_bit (tobe_scheduled, u);
-	      continue;
-	    }
+	  gcc_checking_assert (NONDEBUG_INSN_P (insn));
 
 	  if (bitmap_bit_p (sched_nodes, u))
 	    continue;
@@ -3158,9 +3154,6 @@ ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
 	{
 	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
 
-	  if (!NONDEBUG_INSN_P (insn))
-	    continue;
-
 	  /* Check if there is room for the current insn.  */
 	  if (!can_issue_more || state_dead_lock_p (curr_state))
 	    return true;
diff --git a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c
new file mode 100644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c
@@ -0,0 +1,4 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fcompare-debug -fmodulo-sched --param sms-min-sc=1" } */
+
+#include "pr87197.c"
Li, Pan2 via Gcc-patches March 27, 2020, 4:53 p.m. UTC | #4
Hi,


On Fri, 20 Mar 2020 at 11:56, Roman Zhuykov via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi all!
>
> 12.03.2020 6:17, Jeff Law wrote:
> >> Current modulo-sched implementation is a bit faulty from -fcompile-debug
> >> perspective.
> >>
> >> But right now I see that when I enable -fmodulo-sched by default, powerpc64le
> >> bootstrap give comparison failure as of r10-7056.
> >>
> >> Comparing stages 2 and 3
> >> Bootstrap comparison failure!
> >> gcc/ggc-page.o differs
> >>
> >> That doesn't happen on released branches, so it is a kind of "regression"
> >> (certainly, nobody runs bootstrap with -fmodulo-sched).
> > Even though we don't have a BZ, I think a bootstrap failure, even one with
> > modulo-scheduling enabled is severe enough that we should try to fix it.
> >
> > OK for the trunk.
> >
> > jeff
>
> 11.03.2020 11:14, Richard Biener wrote:
> >
> > Yes from a RM perspective, no comments about the technical details of
> > the patch.
> >
> > Richard.
> >
>
> Haven't committed that patch yet but made some additional investigation.
>
> (0) Running tests with -fcompare-debug (without -fmodulo-sched) shows a lot of different failures, I've created few PRs already and they were addressed (Thanks to Jakub!).
>
> There are three kinds of issues.
>
> (0a) Technical ones, I've not reported them.  Many checking techniques are not ready for the compiler to run twice for one driver invocation. E.g.
>
> FAIL: c-c++-common/diagnostic-format-json-2.c  -Wc++-compat  (test for excess errors)
>
> fails because second cc1 run prints additional '[]' to the output.
>
> All tests in gcc.dg/pch directory have a similar issue.
>
> (0b) When only dumps differ, but not the code, e.g. PR94223 and PR94167.
>
> (0c) When it is a real "mistake", like PR94166 or PR94211.  Or -w difference like in PR94189.
>
>
> Now, speaking about SMS, enabling it by default catches three (again!) kinds of separate issues with debug instructions.  And two first are observable for many years, while the third one is a bit more tricky and I caught it only in 2019.  My previous patch targeted (2) and (3) together, and now I have to split it.
>
> (1) The first issue is that reordering doesn't touch debug instructions at all.  When we have found a proper schedule permute_partial_schedule function is called.  It constructs new order in reverse, first we move the instruction that should be prev_insn (jump) to its place, than the previous one, etc.
> It doesn't consider moving debug instructions, so all of them are crowded before any non-debug instruction.  This certainly doesn't help much for debug information and on ppc64le causes
>
> FAIL: gcc.dg/guality/loop-1.c   -O2  -DPREVENT_OPTIMIZATION  line 20 i == 1
>
> and few other failures.  I never actually tried to fix those, maybe the best solution here is to drop all debug inside the loop when SMS succeeded.  But in this case we will also lose something - at least for now we have correct debug info when the loop was finished.
>
>
> (2) The second issue is a "simple" -fcompare-debug failure, which actually can not lead to different code in .text (and can not cause bootstrap failure).  As I mentioned in previous mail it can be observed in pr42631.c example for ~10 years.  My previous patch and small attached patch2.txt (can be applied only after patch3.txt) solves this.  Note instructions "stick" to nearest following instruction and they are moved all together.  But when one compares -g0 and -g, the note can instead stick to a debug instruction, and according to (1) it will eventually "move up" together with that debug instruction instead of "sinking" with the following non-debug instruction.  An obvious solution is to extend the "sticking" approach to debug instructions themselves.  I've used that solution ("previous" patch) locally for ~3 years.  This change also affects the approach described in (1) but unfortunately doesn't fix any testcase and make something even worse:
>
> FAIL: gcc.dg/guality/pr90716.c   -O1  -DPREVENT_OPTIMIZATION  line 23 j + 1 == 9
>
> So, IMHO we should not apply this in stage4.
>
> (3) And the last one we have to fix now if we bother about -fmodulo-sched ppc64le bootstrap.  Failing examples are "gcc -fcompare-debug -O2 -fmodulo-sched --param sms-min-sc=1 gcc/testsuite/gcc.dg/torture/pr87197.c" on ppc64le and "gcc -fcompare-debug -O2 gcc.c-torture/execute/pr70127.c" on aarch64.  Contrary to scheduler itself, when building DDG graph we consider all debug instructions and their dependencies.  This may sometimes lead to a different result in sms_order_nodes function, as it depends on SCCs in DDG.  My previous ad-hoc solution was just to remove any "debug->non-debug" edges in DDG.  Now I reimplemented that by fully removing debug instructions nodes.  I've tested patch3.txt a lot last week in different scenarios and will commit it in a week if no objection.
>
>

I've noticed failures on arm and aarch64:
FAIL: gcc.dg/torture/pr87197-debug-sms.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
Excess errors:
xgcc: error: /gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c:
'-fcompare-debug' failure

And on some arm targets:
FAIL:  gcc.c-torture/execute/pr70127-debug-sms.c   -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions  (test for excess errors)
Excess errors:
xgcc: error: /gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c:
'-fcompare-debug' failure
(--target arm-none-linux-gnueabihf --with-cpu cortex-a57 --with-fpu
crypto-neon-fp-armv8)

Are they still expected?

Thanks,
Christophe

> Roman
>
>
>
>
Li, Pan2 via Gcc-patches March 28, 2020, 9:26 p.m. UTC | #5
Hi,

Christophe Lyon wrote 2020-03-27 19:53:
> Hi,
> 
> 
> On Fri, 20 Mar 2020 at 11:56, Roman Zhuykov via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>> 
>> Hi all!
>> 
>> 12.03.2020 6:17, Jeff Law wrote:
>> >> Current modulo-sched implementation is a bit faulty from -fcompile-debug
>> >> perspective.
>> >>
>> >> But right now I see that when I enable -fmodulo-sched by default, powerpc64le
>> >> bootstrap give comparison failure as of r10-7056.
>> >>
>> >> Comparing stages 2 and 3
>> >> Bootstrap comparison failure!
>> >> gcc/ggc-page.o differs
>> >>
>> >> That doesn't happen on released branches, so it is a kind of "regression"
>> >> (certainly, nobody runs bootstrap with -fmodulo-sched).
>> > Even though we don't have a BZ, I think a bootstrap failure, even one with
>> > modulo-scheduling enabled is severe enough that we should try to fix it.
>> >
>> > OK for the trunk.
>> >
>> > jeff
>> 
>> 11.03.2020 11:14, Richard Biener wrote:
>> >
>> > Yes from a RM perspective, no comments about the technical details of
>> > the patch.
>> >
>> > Richard.
>> >
>> 
>> Haven't committed that patch yet but made some additional 
>> investigation.
>> 
>> (0) Running tests with -fcompare-debug (without -fmodulo-sched) shows 
>> a lot of different failures, I've created few PRs already and they 
>> were addressed (Thanks to Jakub!).
>> 
>> There are three kinds of issues.
>> 
>> (0a) Technical ones, I've not reported them.  Many checking techniques 
>> are not ready for the compiler to run twice for one driver invocation. 
>> E.g.
>> 
>> FAIL: c-c++-common/diagnostic-format-json-2.c  -Wc++-compat  (test for 
>> excess errors)
>> 
>> fails because second cc1 run prints additional '[]' to the output.
>> 
>> All tests in gcc.dg/pch directory have a similar issue.
>> 
>> (0b) When only dumps differ, but not the code, e.g. PR94223 and 
>> PR94167.
>> 
>> (0c) When it is a real "mistake", like PR94166 or PR94211.  Or -w 
>> difference like in PR94189.
>> 
>> 
>> Now, speaking about SMS, enabling it by default catches three (again!) 
>> kinds of separate issues with debug instructions.  And two first are 
>> observable for many years, while the third one is a bit more tricky 
>> and I caught it only in 2019.  My previous patch targeted (2) and (3) 
>> together, and now I have to split it.
>> 
>> (1) The first issue is that reordering doesn't touch debug 
>> instructions at all.  When we have found a proper schedule 
>> permute_partial_schedule function is called.  It constructs new order 
>> in reverse, first we move the instruction that should be prev_insn 
>> (jump) to its place, than the previous one, etc.
>> It doesn't consider moving debug instructions, so all of them are 
>> crowded before any non-debug instruction.  This certainly doesn't help 
>> much for debug information and on ppc64le causes
>> 
>> FAIL: gcc.dg/guality/loop-1.c   -O2  -DPREVENT_OPTIMIZATION  line 20 i 
>> == 1
>> 
>> and few other failures.  I never actually tried to fix those, maybe 
>> the best solution here is to drop all debug inside the loop when SMS 
>> succeeded.  But in this case we will also lose something - at least 
>> for now we have correct debug info when the loop was finished.
>> 
>> 
>> (2) The second issue is a "simple" -fcompare-debug failure, which 
>> actually can not lead to different code in .text (and can not cause 
>> bootstrap failure).  As I mentioned in previous mail it can be 
>> observed in pr42631.c example for ~10 years.  My previous patch and 
>> small attached patch2.txt (can be applied only after patch3.txt) 
>> solves this.  Note instructions "stick" to nearest following 
>> instruction and they are moved all together.  But when one compares 
>> -g0 and -g, the note can instead stick to a debug instruction, and 
>> according to (1) it will eventually "move up" together with that debug 
>> instruction instead of "sinking" with the following non-debug 
>> instruction.  An obvious solution is to extend the "sticking" approach 
>> to debug instructions themselves.  I've used that solution ("previous" 
>> patch) locally for ~3 years.  This change also affects the approach 
>> described in (1) but unfortunately doesn't fix any testcase and make 
>> something even worse:
>> 
>> FAIL: gcc.dg/guality/pr90716.c   -O1  -DPREVENT_OPTIMIZATION  line 23 
>> j + 1 == 9
>> 
>> So, IMHO we should not apply this in stage4.
>> 
>> (3) And the last one we have to fix now if we bother about 
>> -fmodulo-sched ppc64le bootstrap.  Failing examples are "gcc 
>> -fcompare-debug -O2 -fmodulo-sched --param sms-min-sc=1 
>> gcc/testsuite/gcc.dg/torture/pr87197.c" on ppc64le and "gcc 
>> -fcompare-debug -O2 gcc.c-torture/execute/pr70127.c" on aarch64.  
>> Contrary to scheduler itself, when building DDG graph we consider all 
>> debug instructions and their dependencies.  This may sometimes lead to 
>> a different result in sms_order_nodes function, as it depends on SCCs 
>> in DDG.  My previous ad-hoc solution was just to remove any 
>> "debug->non-debug" edges in DDG.  Now I reimplemented that by fully 
>> removing debug instructions nodes.  I've tested patch3.txt a lot last 
>> week in different scenarios and will commit it in a week if no 
>> objection.
>> 
>> 
> 
> I've noticed failures on arm and aarch64:
> FAIL: gcc.dg/torture/pr87197-debug-sms.c   -O3 -fomit-frame-pointer
> -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
> excess errors)
> Excess errors:
> xgcc: error: /gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c:
> '-fcompare-debug' failure
> 
> And on some arm targets:
> FAIL:  gcc.c-torture/execute/pr70127-debug-sms.c   -O3
> -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
> -finline-functions  (test for excess errors)
> Excess errors:
> xgcc: error: /gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c:
> '-fcompare-debug' failure
> (--target arm-none-linux-gnueabihf --with-cpu cortex-a57 --with-fpu
> crypto-neon-fp-armv8)
> 
> Are they still expected?

Thank you for reporting!
I have just pushed a testsuite adjustment into master (r10-7445), and 
will give a brief explanation here few days later.


Roman
diff mbox series

Patch

diff --git a/gcc/ddg.c b/gcc/ddg.c
index ca8cb74823..048207a354 100644
--- a/gcc/ddg.c
+++ b/gcc/ddg.c
@@ -185,8 +185,8 @@  create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
   else if (DEP_TYPE (link) == REG_DEP_OUTPUT)
     t = OUTPUT_DEP;
 
-  gcc_assert (!DEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
-  gcc_assert (!DEBUG_INSN_P (src_node->insn) || t == ANTI_DEP);
+  gcc_assert (NONDEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
+  gcc_assert (NONDEBUG_INSN_P (src_node->insn) || DEBUG_INSN_P (dest_node->insn));
 
   /* We currently choose not to create certain anti-deps edges and
      compensate for that by generating reg-moves based on the life-range
@@ -222,9 +222,9 @@  create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
         }
     }
 
-   latency = dep_cost (link);
-   e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
-   add_edge_to_ddg (g, e);
+  latency = dep_cost (link);
+  e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
+  add_edge_to_ddg (g, e);
 }
 
 /* The same as the above function, but it doesn't require a link parameter.  */
@@ -237,8 +237,8 @@  create_ddg_dep_no_link (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to,
   enum reg_note dep_kind;
   struct _dep _dep, *dep = &_dep;
 
-  gcc_assert (!DEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
-  gcc_assert (!DEBUG_INSN_P (from->insn) || d_t == ANTI_DEP);
+  gcc_assert (NONDEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
+  gcc_assert (NONDEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn));
 
   if (d_t == ANTI_DEP)
     dep_kind = REG_DEP_ANTI;
@@ -455,8 +455,12 @@  add_inter_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
 	return;
       else if (from->cuid != to->cuid)
 	{
-	  create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
-	  if (DEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn))
+	  gcc_assert (NONDEBUG_INSN_P (to->insn));
+
+	  if (NONDEBUG_INSN_P (from->insn))
+	    create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
+
+	  if (DEBUG_INSN_P (from->insn))
 	    create_ddg_dep_no_link (g, to, from, ANTI_DEP, MEM_DEP, 1);
 	  else
 	    create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
@@ -607,28 +611,34 @@  create_ddg (basic_block bb, int closing_branch_deps)
       if (! INSN_P (insn))
 	{
 	  if (! first_note && NOTE_P (insn)
-	      && NOTE_KIND (insn) !=  NOTE_INSN_BASIC_BLOCK)
+	      && NOTE_KIND (insn) != NOTE_INSN_BASIC_BLOCK)
 	    first_note = insn;
 	  continue;
 	}
+
       if (JUMP_P (insn))
 	{
 	  gcc_assert (!g->closing_branch);
 	  g->closing_branch = &g->nodes[i];
 	}
-      else if (GET_CODE (PATTERN (insn)) == USE)
-	{
-	  if (! first_note)
-	    first_note = insn;
-	  continue;
-	}
+
+      if (! first_note)
+	first_note = insn;
+
+      if (GET_CODE (PATTERN (insn)) == USE)
+	continue;
 
       g->nodes[i].cuid = i;
       g->nodes[i].successors = sbitmap_alloc (num_nodes);
       bitmap_clear (g->nodes[i].successors);
       g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
       bitmap_clear (g->nodes[i].predecessors);
-      g->nodes[i].first_note = (first_note ? first_note : insn);
+
+      if (! DEBUG_INSN_P (insn))
+	{
+	  g->nodes[i].first_note = first_note;
+	  first_note = NULL;
+	}
 
       g->nodes[i].aux.count = -1;
       g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
@@ -636,7 +646,6 @@  create_ddg (basic_block bb, int closing_branch_deps)
 	g->nodes[i].max_dist[j] = -1;
 
       g->nodes[i++].insn = insn;
-      first_note = NULL;
     }
 
   /* We must have found a branch in DDG.  */
@@ -1002,13 +1011,19 @@  order_sccs (ddg_all_sccs_ptr g)
 static void
 check_sccs (ddg_all_sccs_ptr sccs, int num_nodes)
 {
-  int i = 0;
+  int i;
+  unsigned int j;
   auto_sbitmap tmp (num_nodes);
 
   bitmap_clear (tmp);
   for (i = 0; i < sccs->num_sccs; i++)
     {
+      sbitmap_iterator sbi;
       gcc_assert (!bitmap_empty_p (sccs->sccs[i]->nodes));
+      /* Debug insns should not be in found SCCs.  */
+      EXECUTE_IF_SET_IN_BITMAP (sccs->sccs[i]->nodes, 0, j, sbi)
+	gcc_assert (NONDEBUG_INSN_P (sccs->ddg->nodes[j].insn));
+
       /* Verify that every node in sccs is in exactly one strongly
          connected component.  */
       gcc_assert (!bitmap_intersect_p (tmp, sccs->sccs[i]->nodes));
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index e16e023243..3f6aa8c7c2 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -318,6 +318,7 @@  static rtx_insn *
 ps_first_note (partial_schedule_ptr ps, int id)
 {
   gcc_assert (id < ps->g->num_nodes);
+  gcc_assert (ps->g->nodes[id].first_note);
   return ps->g->nodes[id].first_note;
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr42631-sms1.c b/gcc/testsuite/gcc.dg/pr42631-sms1.c
new file mode 100644
index 0000000000..0117276c73
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr42631-sms1.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-g -O1 -funroll-loops -fmodulo-sched -fcompare-debug" } */
+/* { dg-xfail-if "" { powerpc-ibm-aix* } } */
+
+void foo()
+{
+  unsigned k;
+  while (--k > 0);
+}
diff --git a/gcc/testsuite/gcc.dg/pr42631-sms2.c b/gcc/testsuite/gcc.dg/pr42631-sms2.c
new file mode 100644
index 0000000000..2555da4522
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr42631-sms2.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-g -O1 -funroll-loops -fmodulo-sched -fmodulo-sched-allow-regmoves -fcompare-debug" } */
+/* { dg-xfail-if "" { powerpc-ibm-aix* } } */
+
+void foo()
+{
+  unsigned k;
+  while (--k > 0);
+}