@@ -56,6 +56,8 @@ along with GCC; see the file COPYING3. If not see
#include "debug.h"
#include "dwarf2out.h"
#include "sched-int.h"
+#include "sbitmap.h"
+#include "fibheap.h"
enum upper_128bits_state
{
@@ -73,6 +75,10 @@ typedef struct block_info_def
bool unchanged;
/* TRUE if block has been processed. */
bool processed;
+ /* TRUE if block has been scanned. */
+ bool scanned;
+ /* Previous state of the upper 128bits of AVX registers at entry. */
+ enum upper_128bits_state prev;
} *block_info;
#define BLOCK_INFO(B) ((block_info) (B)->aux)
@@ -135,6 +141,16 @@ move_or_delete_vzeroupper_2 (basic_block bb,
return;
}
+ if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
+ {
+ if (dump_file)
+ fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
+ bb->index, BLOCK_INFO (bb)->state);
+ return;
+ }
+
+ BLOCK_INFO (bb)->prev = state;
+
if (dump_file)
fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
bb->index, state);
@@ -264,6 +280,7 @@ move_or_delete_vzeroupper_2 (basic_block bb,
BLOCK_INFO (bb)->state = state;
BLOCK_INFO (bb)->unchanged = unchanged;
+ BLOCK_INFO (bb)->scanned = true;
if (dump_file)
fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
@@ -273,9 +290,10 @@ move_or_delete_vzeroupper_2 (basic_block bb,
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper
in BLOCK and check its predecessor blocks. Treat UNKNOWN state
- as USED if UNKNOWN_IS_UNUSED is true. */
+ as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
+ state is changed. */
-static void
+static bool
move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
{
edge e;
@@ -288,7 +306,7 @@ move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
block->index, BLOCK_INFO (block)->processed);
if (BLOCK_INFO (block)->processed)
- return;
+ return false;
state = unused;
@@ -324,8 +342,14 @@ done:
/* Need to rescan if the upper 128bits of AVX registers are changed
to USED at exit. */
- if (new_state != old_state && new_state == used)
- cfun->machine->rescan_vzeroupper_p = 1;
+ if (new_state != old_state)
+ {
+ if (new_state == used)
+ cfun->machine->rescan_vzeroupper_p = 1;
+ return true;
+ }
+ else
+ return false;
}
/* Go through the instruction stream looking for vzeroupper. Delete
@@ -338,14 +362,18 @@ move_or_delete_vzeroupper (void)
edge e;
edge_iterator ei;
basic_block bb;
- unsigned int count;
+ fibheap_t worklist, pending, fibheap_swap;
+ sbitmap visited, in_worklist, in_pending, sbitmap_swap;
+ int *bb_order;
+ int *rc_order;
+ int i;
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
- /* Process successor blocks of all entry points. */
+ /* Process outgoing edges of entry point. */
if (dump_file)
- fprintf (dump_file, "Process all entry points\n");
+ fprintf (dump_file, "Process outgoing edges of entry point\n");
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
{
@@ -355,25 +383,102 @@ move_or_delete_vzeroupper (void)
BLOCK_INFO (e->dest)->processed = true;
}
- /* Process all basic blocks. */
- count = 0;
- do
+ /* Compute reverse completion order of depth first search of the CFG
+ so that the data-flow runs faster. */
+ rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
+ bb_order = XNEWVEC (int, last_basic_block);
+ pre_and_rev_post_order_compute (NULL, rc_order, false);
+ for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
+ bb_order[rc_order[i]] = i;
+ free (rc_order);
+
+ worklist = fibheap_new ();
+ pending = fibheap_new ();
+ visited = sbitmap_alloc (last_basic_block);
+ in_worklist = sbitmap_alloc (last_basic_block);
+ in_pending = sbitmap_alloc (last_basic_block);
+ sbitmap_zero (in_worklist);
+
+ /* Don't check outgoing edges of entry point. */
+ sbitmap_ones (in_pending);
+ FOR_EACH_BB (bb)
+ if (BLOCK_INFO (bb)->processed)
+ RESET_BIT (in_pending, bb->index);
+ else
+ {
+ move_or_delete_vzeroupper_1 (bb, false);
+ fibheap_insert (pending, bb_order[bb->index], bb);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "Check remaining basic blocks\n");
+
+ while (!fibheap_empty (pending))
{
- if (dump_file)
- fprintf (dump_file, "Process all basic blocks: trip %d\n",
- count);
+ fibheap_swap = pending;
+ pending = worklist;
+ worklist = fibheap_swap;
+ sbitmap_swap = in_pending;
+ in_pending = in_worklist;
+ in_worklist = sbitmap_swap;
+
+ sbitmap_zero (visited);
+
cfun->machine->rescan_vzeroupper_p = 0;
- FOR_EACH_BB (bb)
- move_or_delete_vzeroupper_1 (bb, false);
+
+ while (!fibheap_empty (worklist))
+ {
+ bb = (basic_block) fibheap_extract_min (worklist);
+ RESET_BIT (in_worklist, bb->index);
+ gcc_assert (!TEST_BIT (visited, bb->index));
+ if (!TEST_BIT (visited, bb->index))
+ {
+ edge_iterator ei;
+
+ SET_BIT (visited, bb->index);
+
+ if (move_or_delete_vzeroupper_1 (bb, false))
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (e->dest == EXIT_BLOCK_PTR
+ || BLOCK_INFO (e->dest)->processed)
+ continue;
+
+ if (TEST_BIT (visited, e->dest->index))
+ {
+ if (!TEST_BIT (in_pending, e->dest->index))
+ {
+ /* Send E->DEST to next round. */
+ SET_BIT (in_pending, e->dest->index);
+ fibheap_insert (pending,
+ bb_order[e->dest->index],
+ e->dest);
+ }
+ }
+ else if (!TEST_BIT (in_worklist, e->dest->index))
+ {
+ /* Add E->DEST to current round. */
+ SET_BIT (in_worklist, e->dest->index);
+ fibheap_insert (worklist, bb_order[e->dest->index],
+ e->dest);
+ }
+ }
+ }
+ }
+
+ if (!cfun->machine->rescan_vzeroupper_p)
+ break;
}
- while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
- /* FIXME: Is 20 big enough? */
- if (count >= 20)
- gcc_unreachable ();
+ free (bb_order);
+ fibheap_delete (worklist);
+ fibheap_delete (pending);
+ sbitmap_free (visited);
+ sbitmap_free (in_worklist);
+ sbitmap_free (in_pending);
if (dump_file)
- fprintf (dump_file, "Process all basic blocks\n");
+ fprintf (dump_file, "Process remaining basic blocks\n");
FOR_EACH_BB (bb)
move_or_delete_vzeroupper_1 (bb, true);
@@ -23,7 +23,7 @@ i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \
$(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \
$(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \
- i386-builtin-types.inc debug.h dwarf2out.h
+ i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H)
i386-c.o: $(srcdir)/config/i386/i386-c.c \
$(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \
new file mode 100644
@@ -0,0 +1,115 @@
+2010-12-17 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gfortran.dg/pr46519-2.f90: New.
+
+2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gfortran.dg/pr46519-1.f: Replace -mtune=generic with
+ -mvzeroupper.
+
+2010-11-24 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gfortran.dg/pr46519-1.f: New.
+
+2010-11-20 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-14.c: Replace -O0 with -O2.
+ * gcc.target/i386/avx-vzeroupper-15.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-16.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-17.c: Likewise.
+
+ * gcc.target/i386/avx-vzeroupper-25.c: New.
+ * gcc.target/i386/avx-vzeroupper-26.c: Likewise.
+
+2010-11-18 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-24.c: New.
+
+2010-11-18 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-21.c: New.
+ * gcc.target/i386/avx-vzeroupper-22.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-23.c: Likewise.
+
+2010-11-17 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-10.c: Expect no avx_vzeroupper.
+ * gcc.target/i386/avx-vzeroupper-11.c: Likewise.
+
+2010-11-16 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-20.c: New.
+
+2010-11-04 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-19.c: New.
+
+2010-11-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/46295
+ * gcc.target/i386/pr46295.c: New.
+
+2010-11-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/pr46285.c: Remove -dp.
+
+2010-11-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/46285
+ * gcc.target/i386/pr46285.c.
+
+2010-11-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/46253
+ * gcc.target/i386/pr46253.c: New.
+
+2010-11-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-16.c: New.
+ * gcc.target/i386/avx-vzeroupper-17.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-18.c: Likewise.
+
+2010-11-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-15.c: New.
+
+2010-10-26 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-4.c: Don't scan
+ avx_vzeroupper_nop. Scan avx_vzeroupper instead of
+ *avx_vzeroupper.
+ * gcc.target/i386/avx-vzeroupper-10.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-11.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-12.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-13.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-14.c: Likewise.
+
+2010-10-19 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-1.c: Add -mtune=generic.
+ * gcc.target/i386/avx-vzeroupper-1.c: Likewise.
+
+ * gcc.target/i386/avx-vzeroupper-13.c: New.
+ * gcc.target/i386/avx-vzeroupper-14.c: Likewise.
+
+2010-10-12 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-9.c: New.
+ * gcc.target/i386/avx-vzeroupper-10.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-11.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-12.c: Likewise.
+
+2010-10-12 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-5.c: New.
+ * gcc.target/i386/avx-vzeroupper-6.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-7.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-8.c: Likewise.
+
+2010-10-12 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-4.c: New.
+
+2010-06-15 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-3.c: New.