diff mbox

[2/2] shrink wrap a function with a single loop: split live_edge

Message ID CACgzC7AJUvPr_uQpMq7zS_570TKAqe12vp1T=D2s=7xUmmh6dA@mail.gmail.com
State New
Headers show

Commit Message

Zhenqiang Chen May 8, 2014, 8:07 a.m. UTC
Hi,

The patch splits the live_edge for move_insn_for_shrink_wrap to sink
the copy out of the entry block.

Bootstrap and no make check regression on X86-64 and ARM.

OK for trunk?

Thanks!
-Zhenqiang

ChangeLog:
2014-05-08  Zhenqiang Chen  <zhenqiang.chen@linaro.org>

        * function.c (next_block_for_reg): Allow live_edge->dest has two
        predecessors.
        (move_insn_for_shrink_wrap): Split live_edge.
        (prepre_shrink_wrap): One more parameter for move_insn_for_shrink_wrap.


   edge e, live_edge;
@@ -5415,10 +5415,12 @@ next_block_for_reg (basic_block bb, int regno,
int end_regno)
   if (live_edge->flags & EDGE_ABNORMAL)
     return NULL;

-  if (EDGE_COUNT (live_edge->dest->preds) > 1)
+  /* When live_edge->dest->preds == 2, we can create a new block on
+     the edge to make it meet the requirement.  */
+  if (EDGE_COUNT (live_edge->dest->preds) > 2)
     return NULL;

-  return live_edge->dest;
+  return live_edge;
 }

 /* Check whether INSN is the last insn in BB or
@@ -5545,20 +5547,25 @@ try_copy_prop (basic_block bb, rtx insn, rtx
src, rtx dest,
   return ret;
 }

- /* Try to move INSN from BB to a successor.  Return true on success.
-    USES and DEFS are the set of registers that are used and defined
-    after INSN in BB.  */
+/* Try to move INSN from BB to a successor.  Return true on success.
+   LAST_USES is the set of registers that are used by the COMPARE or JUMP
+   instructions in the block.  USES is the set of registers that are used
+   by others after INSN except COMARE and JUMP.  DEFS are the set of registers
+   that are used and defined others after INSN.  SPLIT_P indicates whether
+   a live edge from BB is splitted or not.  */

 static bool
 move_insn_for_shrink_wrap (basic_block bb, rtx insn,
                           const HARD_REG_SET uses,
                           const HARD_REG_SET defs,
-                          HARD_REG_SET *last_uses)
+                          HARD_REG_SET *last_uses,
+                          bool *split_p)
{
   rtx set, src, dest;
   bitmap live_out, live_in, bb_uses, bb_defs;
   unsigned int i, dregno, end_dregno, sregno, end_sregno;
   basic_block next_block;
+  edge live_edge;

   /* Look for a simple register copy.  */
   set = single_set (insn);
@@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
     return false;

-  /* See whether there is a successor block to which we could move INSN.  */
-  next_block = next_block_for_reg (bb, dregno, end_dregno);
-  if (!next_block)
+  live_edge = next_block_for_reg (bb, dregno, end_dregno);
+  if (!live_edge)
      return false;

+  next_block = live_edge->dest;
+
   /* If the destination register is referred in later insn,
      try to forward it.  */
   if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
       && !try_copy_prop (bb, insn, src, dest, last_uses))
     return false;

+  /* Create a new basic block on the edge.  */
+  if (EDGE_COUNT (next_block->preds) == 2)
+    {
+      next_block = split_edge (live_edge);
+
+      bitmap_copy (df_get_live_in (next_block), df_get_live_out (bb));
+      df_set_bb_dirty (next_block);
+
+      /* We should not split more than once for a function.  */
+      gcc_assert (!(*split_p));
+      *split_p = true;
+    }
+
   /* At this point we are committed to moving INSN, but let's try to
      move it as far as we can.  */
   do
@@ -5610,7 +5631,10 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
        {
          for (i = dregno; i < end_dregno; i++)
            {
-             if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P (bb_defs, i)
+
+             if (*split_p
+                 || REGNO_REG_SET_P (bb_uses, i)
+                 || REGNO_REG_SET_P (bb_defs, i)
                  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
                next_block = NULL;
              CLEAR_REGNO_REG_SET (live_out, i);
@@ -5621,7 +5645,8 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
             Either way, SRC is now live on entry.  */
          for (i = sregno; i < end_sregno; i++)
            {
-             if (REGNO_REG_SET_P (bb_defs, i)
+             if (*split_p
+                 || REGNO_REG_SET_P (bb_defs, i)
                  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
                next_block = NULL;
              SET_REGNO_REG_SET (live_out, i);
@@ -5650,21 +5675,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
       /* If we don't need to add the move to BB, look for a single
         successor block.  */
       if (next_block)
-       next_block = next_block_for_reg (next_block, dregno, end_dregno);
+       {
+         live_edge = next_block_for_reg (next_block, dregno, end_dregno);
+         if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
+           break;
+         next_block = live_edge->dest;
+       }
     }
   while (next_block);

-  /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
-     (next loop).  */
-  for (i = dregno; i < end_dregno; i++)
+  /* For the new created basic block, there is no dataflow info at all.
+     So skip the following dataflow update and check.  */
+  if (!(*split_p))
     {
-      CLEAR_REGNO_REG_SET (bb_uses, i);
-      SET_REGNO_REG_SET (bb_defs, i);
-    }
+      /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
+        (next loop).  */
+      for (i = dregno; i < end_dregno; i++)
+       {
+         CLEAR_REGNO_REG_SET (bb_uses, i);
+         SET_REGNO_REG_SET (bb_defs, i);
+       }

-  /* BB now uses SRC.  */
-  for (i = sregno; i < end_sregno; i++)
-    SET_REGNO_REG_SET (bb_uses, i);
+      /* BB now uses SRC.  */
+      for (i = sregno; i < end_sregno; i++)
+       SET_REGNO_REG_SET (bb_uses, i);
+    }

   emit_insn_after (PATTERN (insn), bb_note (bb));
   delete_insn (insn);
@@ -5684,6 +5719,7 @@ prepare_shrink_wrap (basic_block entry_block)
   rtx insn, curr, x;
   HARD_REG_SET uses, defs, last_uses;
   df_ref *ref;
+  bool split_p = false;

   if (!JUMP_P (BB_END (entry_block)))
     return;
@@ -5693,7 +5729,7 @@ prepare_shrink_wrap (basic_block entry_block)
   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
     if (NONDEBUG_INSN_P (insn)
        && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
-                                      &last_uses))
+                                      &last_uses, &split_p))
       {
        /* Add all defined registers to DEFs.  */
        for (ref = DF_INSN_DEFS (insn); *ref; ref++)

Comments

Steven Bosscher May 8, 2014, 8:33 a.m. UTC | #1
On Thu, May 8, 2014 at 10:07 AM, Zhenqiang Chen wrote:
> The patch splits the live_edge for move_insn_for_shrink_wrap to sink
> the copy out of the entry block.

Maybe also time to take the shrink-wrapping code out of function.c and
put it in its own file?

Ciao!
Steven
Jeff Law May 9, 2014, 6:08 a.m. UTC | #2
On 05/08/14 02:07, Zhenqiang Chen wrote:
> Hi,
>
> The patch splits the live_edge for move_insn_for_shrink_wrap to sink
> the copy out of the entry block.
>
> Bootstrap and no make check regression on X86-64 and ARM.
>
> OK for trunk?
>
> Thanks!
> -Zhenqiang
>
> ChangeLog:
> 2014-05-08  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
>
>          * function.c (next_block_for_reg): Allow live_edge->dest has two
>          predecessors.
>          (move_insn_for_shrink_wrap): Split live_edge.
>          (prepre_shrink_wrap): One more parameter for move_insn_for_shrink_wrap.
>
>
> diff --git a/gcc/function.c b/gcc/function.c
> index 764ac82..0be58e2 100644
> --- a/gcc/function.c
> +++ b/gcc/function.c
> @@ -5381,7 +5381,7 @@ requires_stack_frame_p (rtx insn, HARD_REG_SET
> prologue_used,
>      and if BB is its only predecessor.  Return that block if so,
>      otherwise return null.  */
>
> -static basic_block
> +static edge
>   next_block_for_reg (basic_block bb, int regno, int end_regno)
Comment for this function needs to be changed.  You're no longer 
returning a block, but the edge leading to the block.  It also seems the 
name of the function ought to change.

This looks basically OK.  I'd like to see the requested cleanups made, 
then the resulting new patch reposted for a final review.

Jeff
Zhenqiang Chen May 9, 2014, 6:43 a.m. UTC | #3
On 9 May 2014 14:08, Jeff Law <law@redhat.com> wrote:
> On 05/08/14 02:07, Zhenqiang Chen wrote:
>>
>> Hi,
>>
>> The patch splits the live_edge for move_insn_for_shrink_wrap to sink
>> the copy out of the entry block.
>>
>> Bootstrap and no make check regression on X86-64 and ARM.
>>
>> OK for trunk?
>>
>> Thanks!
>> -Zhenqiang
>>
>> ChangeLog:
>> 2014-05-08  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
>>
>>          * function.c (next_block_for_reg): Allow live_edge->dest has two
>>          predecessors.
>>          (move_insn_for_shrink_wrap): Split live_edge.
>>          (prepre_shrink_wrap): One more parameter for
>> move_insn_for_shrink_wrap.
>>
>>
>> diff --git a/gcc/function.c b/gcc/function.c
>> index 764ac82..0be58e2 100644
>> --- a/gcc/function.c
>> +++ b/gcc/function.c
>> @@ -5381,7 +5381,7 @@ requires_stack_frame_p (rtx insn, HARD_REG_SET
>> prologue_used,
>>      and if BB is its only predecessor.  Return that block if so,
>>      otherwise return null.  */
>>
>> -static basic_block
>> +static edge
>>   next_block_for_reg (basic_block bb, int regno, int end_regno)
>
> Comment for this function needs to be changed.  You're no longer returning a
> block, but the edge leading to the block.  It also seems the name of the
> function ought to change.
>
> This looks basically OK.  I'd like to see the requested cleanups made, then
> the resulting new patch reposted for a final review.

Thank you for the comments. I will follow Steven's comments to
separate shrink-wrapping code from function.c to shrink-wrap.c.

-Zhenqiang
Jiong Wang Sept. 15, 2014, 3:28 p.m. UTC | #4
On 08/05/14 09:07, Zhenqiang Chen wrote:
>   static bool
>   move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>                             const HARD_REG_SET uses,
>                             const HARD_REG_SET defs,
> -                          HARD_REG_SET *last_uses)
> +                          HARD_REG_SET *last_uses,
> +                          bool *split_p)
> {
>     rtx set, src, dest;
>     bitmap live_out, live_in, bb_uses, bb_defs;
>     unsigned int i, dregno, end_dregno, sregno, end_sregno;
>     basic_block next_block;
> +  edge live_edge;
>
>     /* Look for a simple register copy.  */
>     set = single_set (insn);
> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>         || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>       return false;
>
> -  /* See whether there is a successor block to which we could move INSN.  */
> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
> -  if (!next_block)
> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);
> +  if (!live_edge)
>        return false;
>
> +  next_block = live_edge->dest;
> +
>     /* If the destination register is referred in later insn,
>        try to forward it.  */
>     if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
>         && !try_copy_prop (bb, insn, src, dest, last_uses))
>       return false;
>
> +  /* Create a new basic block on the edge.  */
> +  if (EDGE_COUNT (next_block->preds) == 2)
> +    {
> +      next_block = split_edge (live_edge);
> +
> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out (bb));

(re-sent, looks like the first send not received by the server...)

  for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file attached)

  174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because of the sink of move
instruction "ax:DI = 0" and split edge.

  but the live_in of this BB is copied from live_out of BB 2 which is too big, and
actually prevent the later sink of "16: r12:DI=dx:DI".

  Should it be better to copy live_in from "next_block->next_bb" instead of live_out from "bb",
as it will model what's needed more accurately?

+      bitmap_copy (df_get_live_in (next_block), df_get_live_in (next_block->next_bb));

  After this modification, pass x86-64 bootstrap, and this function could be shrink-wrapped.

-- Jiong

> +      df_set_bb_dirty (next_block);
> +
> +      /* We should not split more than once for a function.  */
> +      gcc_assert (!(*split_p));
> +      *split_p = true;
> +    }
> +
>     /* At this point we are committed to moving INSN, but let's try to
>        move it as far as we can.  */
>     do
> @@ -5610,7 +5631,10 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>          {
>            for (i = dregno; i < end_dregno; i++)
>              {
> -             if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P (bb_defs, i)
> +
> +             if (*split_p
> +                 || REGNO_REG_SET_P (bb_uses, i)
> +                 || REGNO_REG_SET_P (bb_defs, i)
>                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
>                  next_block = NULL;
>                CLEAR_REGNO_REG_SET (live_out, i);
> @@ -5621,7 +5645,8 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>               Either way, SRC is now live on entry.  */
>            for (i = sregno; i < end_sregno; i++)
>              {
> -             if (REGNO_REG_SET_P (bb_defs, i)
> +             if (*split_p
> +                 || REGNO_REG_SET_P (bb_defs, i)
>                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
>                  next_block = NULL;
>                SET_REGNO_REG_SET (live_out, i);
> @@ -5650,21 +5675,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>         /* If we don't need to add the move to BB, look for a single
>           successor block.  */
>         if (next_block)
> -       next_block = next_block_for_reg (next_block, dregno, end_dregno);
> +       {
> +         live_edge = next_block_for_reg (next_block, dregno, end_dregno);
> +         if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
> +           break;
> +         next_block = live_edge->dest;
> +       }
>       }
>     while (next_block);
>
> -  /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
> -     (next loop).  */
> -  for (i = dregno; i < end_dregno; i++)
> +  /* For the new created basic block, there is no dataflow info at all.
> +     So skip the following dataflow update and check.  */
> +  if (!(*split_p))
>       {
> -      CLEAR_REGNO_REG_SET (bb_uses, i);
> -      SET_REGNO_REG_SET (bb_defs, i);
> -    }
> +      /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
> +        (next loop).  */
> +      for (i = dregno; i < end_dregno; i++)
> +       {
> +         CLEAR_REGNO_REG_SET (bb_uses, i);
> +         SET_REGNO_REG_SET (bb_defs, i);
> +       }
>
> -  /* BB now uses SRC.  */
> -  for (i = sregno; i < end_sregno; i++)
> -    SET_REGNO_REG_SET (bb_uses, i);
> +      /* BB now uses SRC.  */
> +      for (i = sregno; i < end_sregno; i++)
> +       SET_REGNO_REG_SET (bb_uses, i);
> +    }
>
>     emit_insn_after (PATTERN (insn), bb_note (bb));
>     delete_insn (insn);
> @@ -5684,6 +5719,7 @@ prepare_shrink_wrap (basic_block entry_block)
>     rtx insn, curr, x;
>     HARD_REG_SET uses, defs, last_uses;
>     df_ref *ref;
> +  bool split_p = false;
>
>     if (!JUMP_P (BB_END (entry_block)))
>       return;
> @@ -5693,7 +5729,7 @@ prepare_shrink_wrap (basic_block entry_block)
>     FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
>       if (NONDEBUG_INSN_P (insn)
>          && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
> -                                      &last_uses))
> +                                      &last_uses, &split_p))
>         {
>          /* Add all defined registers to DEFs.  */
>          for (ref = DF_INSN_DEFS (insn); *ref; ref++)
>
Zhenqiang Chen Sept. 16, 2014, 6:55 a.m. UTC | #5
> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Jiong Wang
> Sent: Monday, September 15, 2014 11:28 PM
> To: Zhenqiang Chen
> Cc: gcc-patches@gcc.gnu.org; Jeff Law
> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
> live_edge
> 
> On 08/05/14 09:07, Zhenqiang Chen wrote:
> >   static bool
> >   move_insn_for_shrink_wrap (basic_block bb, rtx insn,
> >                             const HARD_REG_SET uses,
> >                             const HARD_REG_SET defs,
> > -                          HARD_REG_SET *last_uses)
> > +                          HARD_REG_SET *last_uses,
> > +                          bool *split_p)
> > {
> >     rtx set, src, dest;
> >     bitmap live_out, live_in, bb_uses, bb_defs;
> >     unsigned int i, dregno, end_dregno, sregno, end_sregno;
> >     basic_block next_block;
> > +  edge live_edge;
> >
> >     /* Look for a simple register copy.  */
> >     set = single_set (insn);
> > @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
> rtx insn,
> >         || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
> >       return false;
> >
> > -  /* See whether there is a successor block to which we could move
> > INSN.  */
> > -  next_block = next_block_for_reg (bb, dregno, end_dregno);
> > -  if (!next_block)
> > +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
> > + (!live_edge)
> >        return false;
> >
> > +  next_block = live_edge->dest;
> > +
> >     /* If the destination register is referred in later insn,
> >        try to forward it.  */
> >     if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
> >         && !try_copy_prop (bb, insn, src, dest, last_uses))
> >       return false;
> >
> > +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
> > + (next_block->preds) == 2)
> > +    {
> > +      next_block = split_edge (live_edge);
> > +
> > +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
> > + (bb));
> 
> (re-sent, looks like the first send not received by the server...)
> 
>   for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file
> attached)
> 
>   174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because
> of the sink of move instruction "ax:DI = 0" and split edge.
> 
>   but the live_in of this BB is copied from live_out of BB 2 which is too big, and
> actually prevent the later sink of "16: r12:DI=dx:DI".
> 
>   Should it be better to copy live_in from "next_block->next_bb" instead of
> live_out from "bb", as it will model what's needed more accurately?

According to the algorithm, next_block->next_bb (which is live_edge->dest) should have two predecessors. Live_in of next_block->next_bb would include live_out of the other edge,  which is not necessary accurately. To be more accurate, you may need an intersection of df_get_live_out (bb) and df_get_live_in (next_block->next_bb).

Thanks!
-Zhenqiang
 
> +      bitmap_copy (df_get_live_in (next_block), df_get_live_in
> + (next_block->next_bb));
> 
>   After this modification, pass x86-64 bootstrap, and this function could be
> shrink-wrapped.
> 
> -- Jiong
> 
> > +      df_set_bb_dirty (next_block);
> > +
> > +      /* We should not split more than once for a function.  */
> > +      gcc_assert (!(*split_p));
> > +      *split_p = true;
> > +    }
> > +
> >     /* At this point we are committed to moving INSN, but let's try to
> >        move it as far as we can.  */
> >     do
> > @@ -5610,7 +5631,10 @@ move_insn_for_shrink_wrap (basic_block bb,
> rtx insn,
> >          {
> >            for (i = dregno; i < end_dregno; i++)
> >              {
> > -             if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P
> (bb_defs, i)
> > +
> > +             if (*split_p
> > +                 || REGNO_REG_SET_P (bb_uses, i)
> > +                 || REGNO_REG_SET_P (bb_defs, i)
> >                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
> >                  next_block = NULL;
> >                CLEAR_REGNO_REG_SET (live_out, i); @@ -5621,7 +5645,8
> > @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
> >               Either way, SRC is now live on entry.  */
> >            for (i = sregno; i < end_sregno; i++)
> >              {
> > -             if (REGNO_REG_SET_P (bb_defs, i)
> > +             if (*split_p
> > +                 || REGNO_REG_SET_P (bb_defs, i)
> >                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
> >                  next_block = NULL;
> >                SET_REGNO_REG_SET (live_out, i); @@ -5650,21 +5675,31
> > @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
> >         /* If we don't need to add the move to BB, look for a single
> >           successor block.  */
> >         if (next_block)
> > -       next_block = next_block_for_reg (next_block, dregno, end_dregno);
> > +       {
> > +         live_edge = next_block_for_reg (next_block, dregno, end_dregno);
> > +         if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
> > +           break;
> > +         next_block = live_edge->dest;
> > +       }
> >       }
> >     while (next_block);
> >
> > -  /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
> > -     (next loop).  */
> > -  for (i = dregno; i < end_dregno; i++)
> > +  /* For the new created basic block, there is no dataflow info at all.
> > +     So skip the following dataflow update and check.  */  if
> > + (!(*split_p))
> >       {
> > -      CLEAR_REGNO_REG_SET (bb_uses, i);
> > -      SET_REGNO_REG_SET (bb_defs, i);
> > -    }
> > +      /* BB now defines DEST.  It only uses the parts of DEST that overlap
> SRC
> > +        (next loop).  */
> > +      for (i = dregno; i < end_dregno; i++)
> > +       {
> > +         CLEAR_REGNO_REG_SET (bb_uses, i);
> > +         SET_REGNO_REG_SET (bb_defs, i);
> > +       }
> >
> > -  /* BB now uses SRC.  */
> > -  for (i = sregno; i < end_sregno; i++)
> > -    SET_REGNO_REG_SET (bb_uses, i);
> > +      /* BB now uses SRC.  */
> > +      for (i = sregno; i < end_sregno; i++)
> > +       SET_REGNO_REG_SET (bb_uses, i);
> > +    }
> >
> >     emit_insn_after (PATTERN (insn), bb_note (bb));
> >     delete_insn (insn);
> > @@ -5684,6 +5719,7 @@ prepare_shrink_wrap (basic_block entry_block)
> >     rtx insn, curr, x;
> >     HARD_REG_SET uses, defs, last_uses;
> >     df_ref *ref;
> > +  bool split_p = false;
> >
> >     if (!JUMP_P (BB_END (entry_block)))
> >       return;
> > @@ -5693,7 +5729,7 @@ prepare_shrink_wrap (basic_block entry_block)
> >     FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
> >       if (NONDEBUG_INSN_P (insn)
> >          && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
> > -                                      &last_uses))
> > +                                      &last_uses, &split_p))
> >         {
> >          /* Add all defined registers to DEFs.  */
> >          for (ref = DF_INSN_DEFS (insn); *ref; ref++)
> >
Jeff Law Sept. 19, 2014, 5:51 a.m. UTC | #6
On 09/16/14 00:55, Zhenqiang Chen wrote:
>
>
>> -----Original Message-----
>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>> Sent: Monday, September 15, 2014 11:28 PM
>> To: Zhenqiang Chen
>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
>> live_edge
>>
>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>    static bool
>>>    move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>                              const HARD_REG_SET uses,
>>>                              const HARD_REG_SET defs,
>>> -                          HARD_REG_SET *last_uses)
>>> +                          HARD_REG_SET *last_uses,
>>> +                          bool *split_p)
>>> {
>>>      rtx set, src, dest;
>>>      bitmap live_out, live_in, bb_uses, bb_defs;
>>>      unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>      basic_block next_block;
>>> +  edge live_edge;
>>>
>>>      /* Look for a simple register copy.  */
>>>      set = single_set (insn);
>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>> rtx insn,
>>>          || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>        return false;
>>>
>>> -  /* See whether there is a successor block to which we could move
>>> INSN.  */
>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>> -  if (!next_block)
>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>> + (!live_edge)
>>>         return false;
>>>
>>> +  next_block = live_edge->dest;
>>> +
>>>      /* If the destination register is referred in later insn,
>>>         try to forward it.  */
>>>      if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
>>>          && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>        return false;
>>>
>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>> + (next_block->preds) == 2)
>>> +    {
>>> +      next_block = split_edge (live_edge);
>>> +
>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>> + (bb));
>>
>> (re-sent, looks like the first send not received by the server...)
>>
>>    for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file
>> attached)
>>
>>    174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because
>> of the sink of move instruction "ax:DI = 0" and split edge.
>>
>>    but the live_in of this BB is copied from live_out of BB 2 which is too big, and
>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>
>>    Should it be better to copy live_in from "next_block->next_bb" instead of
>> live_out from "bb", as it will model what's needed more accurately?
>
> According to the algorithm, next_block->next_bb (which is live_edge->dest) should have two predecessors. Live_in of next_block->next_bb would include live_out of the other edge,  which is not necessary accurately. To be more accurate, you may need an intersection of df_get_live_out (bb) and df_get_live_in (next_block->next_bb).
>
Presumably we can't recompute the liveness info here as it's too 
expensive to do that each time we split an edge to sink a statement? 
ie, we need the more accurate liveness within this pass so that we can 
sink further instructions?

jeff
Jiong Wang Sept. 19, 2014, 11:27 a.m. UTC | #7
On 19/09/14 06:51, Jeff Law wrote:
> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>
>>> -----Original Message-----
>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>> Sent: Monday, September 15, 2014 11:28 PM
>>> To: Zhenqiang Chen
>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
>>> live_edge
>>>
>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>     static bool
>>>>     move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>                               const HARD_REG_SET uses,
>>>>                               const HARD_REG_SET defs,
>>>> -                          HARD_REG_SET *last_uses)
>>>> +                          HARD_REG_SET *last_uses,
>>>> +                          bool *split_p)
>>>> {
>>>>       rtx set, src, dest;
>>>>       bitmap live_out, live_in, bb_uses, bb_defs;
>>>>       unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>       basic_block next_block;
>>>> +  edge live_edge;
>>>>
>>>>       /* Look for a simple register copy.  */
>>>>       set = single_set (insn);
>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>> rtx insn,
>>>>           || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>>         return false;
>>>>
>>>> -  /* See whether there is a successor block to which we could move
>>>> INSN.  */
>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>> -  if (!next_block)
>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>> + (!live_edge)
>>>>          return false;
>>>>
>>>> +  next_block = live_edge->dest;
>>>> +
>>>>       /* If the destination register is referred in later insn,
>>>>          try to forward it.  */
>>>>       if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
>>>>           && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>         return false;
>>>>
>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>> + (next_block->preds) == 2)
>>>> +    {
>>>> +      next_block = split_edge (live_edge);
>>>> +
>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>> + (bb));
>>> (re-sent, looks like the first send not received by the server...)
>>>
>>>     for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file
>>> attached)
>>>
>>>     174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because
>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>
>>>     but the live_in of this BB is copied from live_out of BB 2 which is too big, and
>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>
>>>     Should it be better to copy live_in from "next_block->next_bb" instead of
>>> live_out from "bb", as it will model what's needed more accurately?
>> According to the algorithm, next_block->next_bb (which is live_edge->dest) should have two predecessors. Live_in of next_block->next_bb would include live_out of the other edge,  which is not necessary accurately. To be more accurate, you may need an intersection of df_get_live_out (bb) and df_get_live_in (next_block->next_bb).
>>
> Presumably we can't recompute the liveness info here as it's too
> expensive to do that each time we split an edge to sink a statement?
> ie, we need the more accurate liveness within this pass so that we can
> sink further instructions?

for a single case, x86 bootstrap, looks like these code are not compile time critical
as for all 4762 live edges which could sink instructions

4139: EDGE_COUNT (next_block->preds) == 1
270: EDGE_COUNT (next_block->preds) == 2
353: EDGE_COUNT (next_block->preds) > 2

and if we make the live info more accurate here, just very few additional functions (< 10)
shrink-wrapped from glibc build & gcc bootstrap test.

So, is it OK we simply change "bitmap_copy  (df_get_live_in  (next_block),  df_get_live_out  (bb));"
into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out  (bb),  df_get_live_in  (next_block->next_bb));" ?

-- Jiong

>
> jeff
>
>
Jeff Law Sept. 19, 2014, 3:49 p.m. UTC | #8
On 09/19/14 05:27, Jiong Wang wrote:
>
> On 19/09/14 06:51, Jeff Law wrote:
>> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>>
>>>> -----Original Message-----
>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>>> Sent: Monday, September 15, 2014 11:28 PM
>>>> To: Zhenqiang Chen
>>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
>>>> split
>>>> live_edge
>>>>
>>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>>     static bool
>>>>>     move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>>                               const HARD_REG_SET uses,
>>>>>                               const HARD_REG_SET defs,
>>>>> -                          HARD_REG_SET *last_uses)
>>>>> +                          HARD_REG_SET *last_uses,
>>>>> +                          bool *split_p)
>>>>> {
>>>>>       rtx set, src, dest;
>>>>>       bitmap live_out, live_in, bb_uses, bb_defs;
>>>>>       unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>>       basic_block next_block;
>>>>> +  edge live_edge;
>>>>>
>>>>>       /* Look for a simple register copy.  */
>>>>>       set = single_set (insn);
>>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>>> rtx insn,
>>>>>           || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>>>         return false;
>>>>>
>>>>> -  /* See whether there is a successor block to which we could move
>>>>> INSN.  */
>>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>>> -  if (!next_block)
>>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>>> + (!live_edge)
>>>>>          return false;
>>>>>
>>>>> +  next_block = live_edge->dest;
>>>>> +
>>>>>       /* If the destination register is referred in later insn,
>>>>>          try to forward it.  */
>>>>>       if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest),
>>>>> dregno)
>>>>>           && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>>         return false;
>>>>>
>>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>>> + (next_block->preds) == 2)
>>>>> +    {
>>>>> +      next_block = split_edge (live_edge);
>>>>> +
>>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>>> + (bb));
>>>> (re-sent, looks like the first send not received by the server...)
>>>>
>>>>     for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot
>>>> file
>>>> attached)
>>>>
>>>>     174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block
>>>> because
>>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>>
>>>>     but the live_in of this BB is copied from live_out of BB 2 which
>>>> is too big, and
>>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>>
>>>>     Should it be better to copy live_in from "next_block->next_bb"
>>>> instead of
>>>> live_out from "bb", as it will model what's needed more accurately?
>>> According to the algorithm, next_block->next_bb (which is
>>> live_edge->dest) should have two predecessors. Live_in of
>>> next_block->next_bb would include live_out of the other edge,  which
>>> is not necessary accurately. To be more accurate, you may need an
>>> intersection of df_get_live_out (bb) and df_get_live_in
>>> (next_block->next_bb).
>>>
>> Presumably we can't recompute the liveness info here as it's too
>> expensive to do that each time we split an edge to sink a statement?
>> ie, we need the more accurate liveness within this pass so that we can
>> sink further instructions?
>
> for a single case, x86 bootstrap, looks like these code are not compile
> time critical
> as for all 4762 live edges which could sink instructions
>
> 4139: EDGE_COUNT (next_block->preds) == 1
> 270: EDGE_COUNT (next_block->preds) == 2
> 353: EDGE_COUNT (next_block->preds) > 2
>
> and if we make the live info more accurate here, just very few
> additional functions (< 10)
> shrink-wrapped from glibc build & gcc bootstrap test.
Perhaps I wasn't clear.

The whole point behind the proposed changes is to enable additional 
sinking that is currently blocked because of the overly large set of 
live objects.

So my question was an attempt to understand why we didn't just a normal 
liveness update -- I speculated that we aren't doing a normal liveness 
update because of the compile-time cost.



>
> So, is it OK we simply change "bitmap_copy  (df_get_live_in
> (next_block),  df_get_live_out  (bb));"
> into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out
> (bb),  df_get_live_in  (next_block->next_bb));" ?
Probably.  Though I'd be a bit concerned with next_block->next_bb. 
Wouldn't it be safer to stash away the relevant basic block prior to the 
call to split_edge, then use that saved copy.  Something like this 
(untested):

basic_block old_dest = live_edge->dest;
next_block = split_edge (live_edge);

/* We create a new basic block.  Call df_grow_bb_info to make sure
    all data structures are allocated.  */
df_grow_bb_info (df_live);
bitmap_and (df_get_live_in (next_block),
             df_get_live_out (bb),
             df_get_live_in (old_dest));


The idea being we don't want to depend on the precise ordering blocks in 
the block chain.

Could you try that and see if it does what you need?

jeff
Jiong Wang Sept. 19, 2014, 4:02 p.m. UTC | #9
On 19/09/14 16:49, Jeff Law wrote:
> On 09/19/14 05:27, Jiong Wang wrote:
>> On 19/09/14 06:51, Jeff Law wrote:
>>> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>>>> -----Original Message-----
>>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>>>> Sent: Monday, September 15, 2014 11:28 PM
>>>>> To: Zhenqiang Chen
>>>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
>>>>> split
>>>>> live_edge
>>>>>
>>>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>>>      static bool
>>>>>>      move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>>>                                const HARD_REG_SET uses,
>>>>>>                                const HARD_REG_SET defs,
>>>>>> -                          HARD_REG_SET *last_uses)
>>>>>> +                          HARD_REG_SET *last_uses,
>>>>>> +                          bool *split_p)
>>>>>> {
>>>>>>        rtx set, src, dest;
>>>>>>        bitmap live_out, live_in, bb_uses, bb_defs;
>>>>>>        unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>>>        basic_block next_block;
>>>>>> +  edge live_edge;
>>>>>>
>>>>>>        /* Look for a simple register copy.  */
>>>>>>        set = single_set (insn);
>>>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>>>> rtx insn,
>>>>>>            || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>>>>          return false;
>>>>>>
>>>>>> -  /* See whether there is a successor block to which we could move
>>>>>> INSN.  */
>>>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>>>> -  if (!next_block)
>>>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>>>> + (!live_edge)
>>>>>>           return false;
>>>>>>
>>>>>> +  next_block = live_edge->dest;
>>>>>> +
>>>>>>        /* If the destination register is referred in later insn,
>>>>>>           try to forward it.  */
>>>>>>        if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest),
>>>>>> dregno)
>>>>>>            && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>>>          return false;
>>>>>>
>>>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>>>> + (next_block->preds) == 2)
>>>>>> +    {
>>>>>> +      next_block = split_edge (live_edge);
>>>>>> +
>>>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>>>> + (bb));
>>>>> (re-sent, looks like the first send not received by the server...)
>>>>>
>>>>>      for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot
>>>>> file
>>>>> attached)
>>>>>
>>>>>      174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block
>>>>> because
>>>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>>>
>>>>>      but the live_in of this BB is copied from live_out of BB 2 which
>>>>> is too big, and
>>>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>>>
>>>>>      Should it be better to copy live_in from "next_block->next_bb"
>>>>> instead of
>>>>> live_out from "bb", as it will model what's needed more accurately?
>>>> According to the algorithm, next_block->next_bb (which is
>>>> live_edge->dest) should have two predecessors. Live_in of
>>>> next_block->next_bb would include live_out of the other edge,  which
>>>> is not necessary accurately. To be more accurate, you may need an
>>>> intersection of df_get_live_out (bb) and df_get_live_in
>>>> (next_block->next_bb).
>>>>
>>> Presumably we can't recompute the liveness info here as it's too
>>> expensive to do that each time we split an edge to sink a statement?
>>> ie, we need the more accurate liveness within this pass so that we can
>>> sink further instructions?
>> for a single case, x86 bootstrap, looks like these code are not compile
>> time critical
>> as for all 4762 live edges which could sink instructions
>>
>> 4139: EDGE_COUNT (next_block->preds) == 1
>> 270: EDGE_COUNT (next_block->preds) == 2
>> 353: EDGE_COUNT (next_block->preds) > 2
>>
>> and if we make the live info more accurate here, just very few
>> additional functions (< 10)
>> shrink-wrapped from glibc build & gcc bootstrap test.
> Perhaps I wasn't clear.
>
> The whole point behind the proposed changes is to enable additional
> sinking that is currently blocked because of the overly large set of
> live objects.
>
> So my question was an attempt to understand why we didn't just a normal
> liveness update -- I speculated that we aren't doing a normal liveness
> update because of the compile-time cost.
>
>
>
>> So, is it OK we simply change "bitmap_copy  (df_get_live_in
>> (next_block),  df_get_live_out  (bb));"
>> into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out
>> (bb),  df_get_live_in  (next_block->next_bb));" ?
> Probably.  Though I'd be a bit concerned with next_block->next_bb.
> Wouldn't it be safer to stash away the relevant basic block prior to the
> call to split_edge, then use that saved copy.  Something like this
> (untested):
>
> basic_block old_dest = live_edge->dest;
> next_block = split_edge (live_edge);
>
> /* We create a new basic block.  Call df_grow_bb_info to make sure
>      all data structures are allocated.  */
> df_grow_bb_info (df_live);
> bitmap_and (df_get_live_in (next_block),
>               df_get_live_out (bb),
>               df_get_live_in (old_dest));
>
>
> The idea being we don't want to depend on the precise ordering blocks in
> the block chain.
>
> Could you try that and see if it does what you need?
Jeff,

   Thanks, verified, it works.

Regards,
Jiong
>
> jeff
>
>
Jeff Law Sept. 19, 2014, 4:19 p.m. UTC | #10
On 09/19/14 10:02, Jiong Wang wrote:
>
> On 19/09/14 16:49, Jeff Law wrote:
>> On 09/19/14 05:27, Jiong Wang wrote:
>>> On 19/09/14 06:51, Jeff Law wrote:
>>>> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>>>>> -----Original Message-----
>>>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>>>>> Sent: Monday, September 15, 2014 11:28 PM
>>>>>> To: Zhenqiang Chen
>>>>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>>>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
>>>>>> split
>>>>>> live_edge
>>>>>>
>>>>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>>>>      static bool
>>>>>>>      move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>>>>                                const HARD_REG_SET uses,
>>>>>>>                                const HARD_REG_SET defs,
>>>>>>> -                          HARD_REG_SET *last_uses)
>>>>>>> +                          HARD_REG_SET *last_uses,
>>>>>>> +                          bool *split_p)
>>>>>>> {
>>>>>>>        rtx set, src, dest;
>>>>>>>        bitmap live_out, live_in, bb_uses, bb_defs;
>>>>>>>        unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>>>>        basic_block next_block;
>>>>>>> +  edge live_edge;
>>>>>>>
>>>>>>>        /* Look for a simple register copy.  */
>>>>>>>        set = single_set (insn);
>>>>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>>>>> rtx insn,
>>>>>>>            || overlaps_hard_reg_set_p (defs, GET_MODE (dest),
>>>>>>> dregno))
>>>>>>>          return false;
>>>>>>>
>>>>>>> -  /* See whether there is a successor block to which we could move
>>>>>>> INSN.  */
>>>>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>>>>> -  if (!next_block)
>>>>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>>>>> + (!live_edge)
>>>>>>>           return false;
>>>>>>>
>>>>>>> +  next_block = live_edge->dest;
>>>>>>> +
>>>>>>>        /* If the destination register is referred in later insn,
>>>>>>>           try to forward it.  */
>>>>>>>        if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest),
>>>>>>> dregno)
>>>>>>>            && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>>>>          return false;
>>>>>>>
>>>>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>>>>> + (next_block->preds) == 2)
>>>>>>> +    {
>>>>>>> +      next_block = split_edge (live_edge);
>>>>>>> +
>>>>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>>>>> + (bb));
>>>>>> (re-sent, looks like the first send not received by the server...)
>>>>>>
>>>>>>      for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot
>>>>>> file
>>>>>> attached)
>>>>>>
>>>>>>      174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block
>>>>>> because
>>>>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>>>>
>>>>>>      but the live_in of this BB is copied from live_out of BB 2 which
>>>>>> is too big, and
>>>>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>>>>
>>>>>>      Should it be better to copy live_in from "next_block->next_bb"
>>>>>> instead of
>>>>>> live_out from "bb", as it will model what's needed more accurately?
>>>>> According to the algorithm, next_block->next_bb (which is
>>>>> live_edge->dest) should have two predecessors. Live_in of
>>>>> next_block->next_bb would include live_out of the other edge,  which
>>>>> is not necessary accurately. To be more accurate, you may need an
>>>>> intersection of df_get_live_out (bb) and df_get_live_in
>>>>> (next_block->next_bb).
>>>>>
>>>> Presumably we can't recompute the liveness info here as it's too
>>>> expensive to do that each time we split an edge to sink a statement?
>>>> ie, we need the more accurate liveness within this pass so that we can
>>>> sink further instructions?
>>> for a single case, x86 bootstrap, looks like these code are not compile
>>> time critical
>>> as for all 4762 live edges which could sink instructions
>>>
>>> 4139: EDGE_COUNT (next_block->preds) == 1
>>> 270: EDGE_COUNT (next_block->preds) == 2
>>> 353: EDGE_COUNT (next_block->preds) > 2
>>>
>>> and if we make the live info more accurate here, just very few
>>> additional functions (< 10)
>>> shrink-wrapped from glibc build & gcc bootstrap test.
>> Perhaps I wasn't clear.
>>
>> The whole point behind the proposed changes is to enable additional
>> sinking that is currently blocked because of the overly large set of
>> live objects.
>>
>> So my question was an attempt to understand why we didn't just a normal
>> liveness update -- I speculated that we aren't doing a normal liveness
>> update because of the compile-time cost.
>>
>>
>>
>>> So, is it OK we simply change "bitmap_copy  (df_get_live_in
>>> (next_block),  df_get_live_out  (bb));"
>>> into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out
>>> (bb),  df_get_live_in  (next_block->next_bb));" ?
>> Probably.  Though I'd be a bit concerned with next_block->next_bb.
>> Wouldn't it be safer to stash away the relevant basic block prior to the
>> call to split_edge, then use that saved copy.  Something like this
>> (untested):
>>
>> basic_block old_dest = live_edge->dest;
>> next_block = split_edge (live_edge);
>>
>> /* We create a new basic block.  Call df_grow_bb_info to make sure
>>      all data structures are allocated.  */
>> df_grow_bb_info (df_live);
>> bitmap_and (df_get_live_in (next_block),
>>               df_get_live_out (bb),
>>               df_get_live_in (old_dest));
>>
>>
>> The idea being we don't want to depend on the precise ordering blocks in
>> the block chain.
>>
>> Could you try that and see if it does what you need?
> Jeff,
>
>    Thanks, verified, it works.
Great.  Can you send an updated patchkit for review.

Thanks,

jeff
diff mbox

Patch

diff --git a/gcc/function.c b/gcc/function.c
index 764ac82..0be58e2 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -5381,7 +5381,7 @@  requires_stack_frame_p (rtx insn, HARD_REG_SET
prologue_used,
    and if BB is its only predecessor.  Return that block if so,
    otherwise return null.  */

-static basic_block
+static edge
 next_block_for_reg (basic_block bb, int regno, int end_regno)
 {