@@ -68,14 +68,8 @@ typedef struct block_info_def
{
/* State of the upper 128bits of any AVX registers at exit. */
enum upper_128bits_state state;
- /* If the upper 128bits of any AVX registers are referenced. */
- enum upper_128bits_state referenced;
- /* Number of vzerouppers in this block. */
- unsigned int count;
/* TRUE if block has been processed. */
bool processed;
- /* TRUE if block has been rescanned. */
- bool rescanned;
} *block_info;
#define BLOCK_INFO(B) ((block_info) (B)->aux)
@@ -127,8 +121,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
rtx vzeroupper_insn = NULL_RTX;
rtx pat;
int avx256;
- enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
- int count = BLOCK_INFO (bb)->count;
if (dump_file)
fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
@@ -191,24 +183,20 @@ move_or_delete_vzeroupper_2 (basic_block bb,
/* Delete pending vzeroupper insertion. */
if (vzeroupper_insn)
{
- count--;
delete_insn (vzeroupper_insn);
vzeroupper_insn = NULL_RTX;
}
}
- else if (state != used && referenced != unused)
+ else if (state != used)
{
/* No need to call note_stores if the upper 128bits of
AVX registers are never referenced. */
note_stores (pat, check_avx256_stores, &state);
- if (state == used)
- referenced = used;
}
continue;
}
/* Process vzeroupper intrinsic. */
- count++;
avx256 = INTVAL (XVECEXP (pat, 0, 0));
if (state == unused)
@@ -226,7 +214,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
fprintf (dump_file, "Delete redundant vzeroupper:\n");
print_rtl_single (dump_file, insn);
}
- count--;
delete_insn (insn);
}
else
@@ -246,7 +233,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
print_rtl_single (dump_file, insn);
}
- count--;
delete_insn (insn);
}
else
@@ -256,30 +242,22 @@ move_or_delete_vzeroupper_2 (basic_block bb,
BLOCK_INFO (bb)->state = state;
- if (BLOCK_INFO (bb)->referenced == unknown)
- {
- /* The upper 128bits of AVX registers are never referenced if
- REFERENCED isn't updated. */
- if (referenced == unknown)
- referenced = unused;
- BLOCK_INFO (bb)->referenced = referenced;
- BLOCK_INFO (bb)->count = count;
- }
-
if (dump_file)
fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
bb->index, state);
}
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper
- in BLOCK and its predecessor blocks recursively. */
+ in BLOCK and check its predecessor blocks. Treat UNKNOWN state
+ as USED if UNKNOWN_IS_UNUSED is true. */
static void
-move_or_delete_vzeroupper_1 (basic_block block)
+move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
{
edge e;
edge_iterator ei;
- enum upper_128bits_state state;
+ enum upper_128bits_state state, old_state, new_state;
+ bool seen_unknown;
if (dump_file)
fprintf (dump_file, " Process [bb %i]: status: %d\n",
@@ -288,83 +266,42 @@ move_or_delete_vzeroupper_1 (basic_block block)
if (BLOCK_INFO (block)->processed)
return;
- BLOCK_INFO (block)->processed = true;
-
- state = unknown;
+ state = unused;
- /* Process all predecessor edges of this block. */
+ /* Check all predecessor edges of this block. */
+ seen_unknown = false;
FOR_EACH_EDGE (e, ei, block->preds)
{
if (e->src == block)
continue;
- move_or_delete_vzeroupper_1 (e->src);
switch (BLOCK_INFO (e->src)->state)
{
case unknown:
- if (state == unused)
- state = unknown;
+ if (!unknown_is_unused)
+ seen_unknown = true;
+ case unused:
break;
case used:
state = used;
- break;
- case unused:
- break;
+ goto done;
}
}
- /* If state of any predecessor edges is unknown, we need to rescan. */
- if (state == unknown)
- cfun->machine->rescan_vzeroupper_p = 1;
+ if (seen_unknown)
+ state = unknown;
- /* Process this block. */
+done:
+ old_state = BLOCK_INFO (block)->state;
move_or_delete_vzeroupper_2 (block, state);
-}
-
-/* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
- in BLOCK and its predecessor blocks recursively. */
-
-static void
-rescan_move_or_delete_vzeroupper (basic_block block)
-{
- edge e;
- edge_iterator ei;
- enum upper_128bits_state state;
-
- if (dump_file)
- fprintf (dump_file, " Rescan [bb %i]: status: %d\n",
- block->index, BLOCK_INFO (block)->rescanned);
-
- if (BLOCK_INFO (block)->rescanned)
- return;
-
- BLOCK_INFO (block)->rescanned = true;
-
- state = unused;
+ new_state = BLOCK_INFO (block)->state;
- /* Rescan all predecessor edges of this block. */
- FOR_EACH_EDGE (e, ei, block->preds)
- {
- if (e->src == block)
- continue;
- rescan_move_or_delete_vzeroupper (e->src);
- /* For rescan, UKKNOWN state is treated as UNUSED. */
- if (BLOCK_INFO (e->src)->state == used)
- state = used;
- }
+ if (state != unknown || new_state == used)
+ BLOCK_INFO (block)->processed = true;
- /* Rescan this block only if there are vzerouppers or the upper
- 128bits of AVX registers are referenced. */
- if (BLOCK_INFO (block)->count == 0
- && (state == used || BLOCK_INFO (block)->referenced != used))
- {
- if (state == used)
- BLOCK_INFO (block)->state = state;
- if (dump_file)
- fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
- block->index, BLOCK_INFO (block)->state);
- }
- else
- move_or_delete_vzeroupper_2 (block, state);
+ /* Need to rescan if the upper 128bits of AVX registers are changed
+ to USED at exit. */
+ if (new_state != old_state && new_state == used)
+ cfun->machine->rescan_vzeroupper_p = 1;
}
/* Go through the instruction stream looking for vzeroupper. Delete
@@ -377,7 +314,7 @@ move_or_delete_vzeroupper (void)
edge e;
edge_iterator ei;
basic_block bb;
- unsigned int count = 0;
+ unsigned int count;
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
@@ -392,28 +329,30 @@ move_or_delete_vzeroupper (void)
cfun->machine->caller_pass_avx256_p
? used : unused);
BLOCK_INFO (e->dest)->processed = true;
- BLOCK_INFO (e->dest)->rescanned = true;
}
/* Process all basic blocks. */
- if (dump_file)
- fprintf (dump_file, "Process all basic blocks\n");
-
- FOR_EACH_BB (bb)
- {
- move_or_delete_vzeroupper_1 (bb);
- count += BLOCK_INFO (bb)->count;
- }
-
- /* Rescan all basic blocks if needed. */
- if (count && cfun->machine->rescan_vzeroupper_p)
+ count = 0;
+ do
{
if (dump_file)
- fprintf (dump_file, "Rescan all basic blocks\n");
-
+ fprintf (dump_file, "Process all basic blocks: trip %d\n",
+ count);
+ cfun->machine->rescan_vzeroupper_p = 0;
FOR_EACH_BB (bb)
- rescan_move_or_delete_vzeroupper (bb);
+ move_or_delete_vzeroupper_1 (bb, false);
}
+ while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
+
+ /* FIXME: Is 20 big enough? */
+ if (count >= 20)
+ gcc_unreachable ();
+
+ if (dump_file)
+ fprintf (dump_file, "Process all basic blocks\n");
+
+ FOR_EACH_BB (bb)
+ move_or_delete_vzeroupper_1 (bb, true);
free_aux_for_blocks ();
}
new file mode 100644
@@ -0,0 +1,31 @@
+! { dg-do compile { target i?86-*-* x86_64-*-* } }
+! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
+
+ SUBROUTINE func(kts, kte, qrz, qiz, rho)
+ IMPLICIT NONE
+ INTEGER, INTENT(IN) :: kts, kte
+ REAL, DIMENSION(kts:kte), INTENT(INOUT) :: qrz, qiz, rho
+ INTEGER :: k
+ REAL, DIMENSION(kts:kte) :: praci, vtiold
+ REAL :: fluxout
+ INTEGER :: min_q, max_q, var
+ do k=kts,kte
+ praci(k)=0.0
+ enddo
+ min_q=kte
+ max_q=kts-1
+ DO var=1,20
+ do k=max_q,min_q,-1
+ fluxout=rho(k)*qrz(k)
+ enddo
+ qrz(min_q-1)=qrz(min_q-1)+fluxout
+ ENDDO
+ DO var=1,20
+ do k=kts,kte-1
+ vtiold(k)= (rho(k))**0.16
+ enddo
+ ENDDO
+ STOP
+ END SUBROUTINE func
+
+! { dg-final { scan-assembler "avx_vzeroupper" } }