@@ -63,6 +63,10 @@ typedef struct block_info_def
bool upper_128bits_set;
/* TRUE if block has been processed. */
bool done;
+ /* TRUE if block has been scanned. */
+ bool scanned;
+ /* TRUE if 256bit AVX register isn't referenced in block. */
+ bool no_avx256;
} *block_info;
#define BLOCK_INFO(B) ((block_info) (B)->aux)
@@ -108,19 +112,23 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
static void
move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
{
- rtx insn;
+ rtx insn, last;
rtx vzeroupper_insn = NULL_RTX;
rtx pat;
int avx256;
+ bool no_avx256 = BLOCK_INFO (bb)->no_avx256;
if (dump_file)
fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
bb->index, upper_128bits_set);
insn = BB_HEAD (bb);
+ last = NEXT_INSN (BB_END (bb));
while (insn != BB_END (bb))
{
insn = NEXT_INSN (insn);
+ if (insn == last)
+ break;
if (!NONDEBUG_INSN_P (insn))
continue;
@@ -176,7 +184,7 @@ move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
vzeroupper_insn = NULL_RTX;
}
}
- else if (!upper_128bits_set)
+ else if (!upper_128bits_set && !no_avx256)
note_stores (pat, check_avx256_stores, &upper_128bits_set);
continue;
}
@@ -191,8 +199,8 @@ move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
returns 256bit AVX register. */
upper_128bits_set = (avx256 == callee_return_avx256);
- /* Remove unnecessary vzeroupper since
- upper 128bits are cleared. */
+ /* Remove unnecessary vzeroupper since upper 128bits are
+ cleared. */
if (dump_file)
{
fprintf (dump_file, "Delete redundant vzeroupper:\n");
@@ -207,8 +215,8 @@ move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
returns 256bit AVX register. */
upper_128bits_set = (avx256 == callee_return_pass_avx256);
- /* Must remove vzeroupper since
- callee passes in 256bit AVX register. */
+ /* Must remove vzeroupper since callee passes in 256bit
+ AVX register. */
if (dump_file)
{
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
@@ -265,6 +273,109 @@ move_or_delete_vzeroupper_1 (basic_block block)
move_or_delete_vzeroupper_2 (block, upper_128bits_set);
}
+/* Helper function for scan_live_upper_128bits_1. Scan BB to check
+ if the upper 128bits of any AVX registers is live at exit of BB. */
+
+static void
+scan_live_upper_128bits_2 (basic_block bb, bool upper_128bits_set)
+{
+ rtx insn, pat;
+ int avx256;
+ bool no_avx256 = true;
+
+ if (dump_file)
+ fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
+ bb->index, upper_128bits_set);
+
+ FOR_BB_INSNS (bb, insn)
+ if (NONJUMP_INSN_P (insn))
+ {
+ pat = PATTERN (insn);
+
+ /* Check insn for vzeroupper intrinsic. */
+ if (GET_CODE (pat) == UNSPEC_VOLATILE
+ && XINT (pat, 1) == UNSPECV_VZEROUPPER)
+ {
+ /* Process vzeroupper intrinsic. */
+ avx256 = INTVAL (XVECEXP (pat, 0, 0));
+ if (!upper_128bits_set)
+ {
+ /* Since the upper 128bits are cleared, callee must
+ not pass 256bit AVX register. We only need to check
+ if callee returns 256bit AVX register. */
+ upper_128bits_set = (avx256 == callee_return_avx256);
+ }
+ else if (avx256 == callee_return_pass_avx256
+ || avx256 == callee_pass_avx256)
+ {
+ /* Callee passes 256bit AVX register. Check if callee
+ returns 256bit AVX register. */
+ upper_128bits_set = (avx256 == callee_return_pass_avx256);
+ }
+ else
+ upper_128bits_set = false;
+ }
+ else
+ {
+ /* Check insn for vzeroall intrinsic. */
+ if (GET_CODE (pat) == PARALLEL
+ && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
+ && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
+ upper_128bits_set = false;
+ else if (!upper_128bits_set)
+ {
+ note_stores (pat, check_avx256_stores,
+ &upper_128bits_set);
+ if (upper_128bits_set)
+ no_avx256 = false;
+ }
+ }
+ }
+
+ BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
+ BLOCK_INFO (bb)->no_avx256 = no_avx256;
+
+ if (dump_file)
+ fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n",
+ bb->index, upper_128bits_set);
+}
+
+/* Helper function for move_or_delete_vzeroupper. Scan BLOCK and its
+ predecessor blocks recursively to check if the upper 128bits of any
+ AVX registers is live at exit of BLOCK. */
+
+static void
+scan_live_upper_128bits_1 (basic_block block)
+{
+ edge e;
+ edge_iterator ei;
+ bool upper_128bits_set;
+
+ if (dump_file)
+ fprintf (dump_file, " Scan BB [%i]: status: %d\n",
+ block->index, BLOCK_INFO (block)->scanned);
+
+ if (BLOCK_INFO (block)->scanned)
+ return;
+
+ BLOCK_INFO (block)->scanned = true;
+
+ upper_128bits_set = false;
+
+ /* Process all predecessor edges of this block. */
+ FOR_EACH_EDGE (e, ei, block->preds)
+ {
+ if (e->src == block)
+ continue;
+ scan_live_upper_128bits_1 (e->src);
+ if (BLOCK_INFO (e->src)->upper_128bits_set)
+ upper_128bits_set = true;
+ }
+
+ /* Scan this block. */
+ scan_live_upper_128bits_2 (block, upper_128bits_set);
+}
+
/* Go through the instruction stream looking for vzeroupper. Delete
it if upper 128bit AVX registers are unused. If it isn't deleted,
move it to just before a jump insn. */
@@ -287,8 +398,16 @@ move_or_delete_vzeroupper (void)
move_or_delete_vzeroupper_2 (e->dest,
cfun->machine->caller_pass_avx256_p);
BLOCK_INFO (e->dest)->done = true;
+ BLOCK_INFO (e->dest)->scanned = true;
}
+ /* Scan predecessor blocks of all exit points. */
+ if (dump_file)
+ fprintf (dump_file, "Scan all exit points\n");
+
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+ scan_live_upper_128bits_1 (e->src);
+
/* Process predecessor blocks of all exit points. */
if (dump_file)
fprintf (dump_file, "Process all exit points\n");
@@ -4062,17 +4181,6 @@ ix86_option_override_internal (bool main_args_p)
}
}
-/* Return TRUE if type TYPE and mode MODE use 256bit AVX modes. */
-
-static bool
-use_avx256_p (enum machine_mode mode, const_tree type)
-{
- return (VALID_AVX256_REG_MODE (mode)
- || (type
- && TREE_CODE (type) == VECTOR_TYPE
- && int_size_in_bytes (type) == 32));
-}
-
/* Return TRUE if VAL is passed in register with 256bit AVX modes. */
static bool
@@ -5687,7 +5795,6 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
if (function_pass_avx256_p (fnret_value))
{
/* The return value of this function uses 256bit AVX modes. */
- cfun->machine->use_avx256_p = true;
if (caller)
cfun->machine->callee_return_avx256_p = true;
else
@@ -6956,7 +7063,6 @@ ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
{
/* This argument uses 256bit AVX modes. */
- cfun->machine->use_avx256_p = true;
if (cum->caller)
cfun->machine->callee_pass_avx256_p = true;
else
@@ -10970,7 +11076,6 @@ ix86_expand_epilogue (int style)
/* Emit vzeroupper if needed. */
if (TARGET_VZEROUPPER
- && cfun->machine->use_avx256_p
&& !cfun->machine->caller_return_avx256_p)
{
cfun->machine->use_vzeroupper_p = 1;
@@ -15130,9 +15235,6 @@ ix86_expand_move (enum machine_mode mode, rtx operands[])
rtx op0, op1;
enum tls_model model;
- if (VALID_AVX256_REG_MODE (mode))
- cfun->machine->use_avx256_p = true;
-
op0 = operands[0];
op1 = operands[1];
@@ -15277,9 +15379,6 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
rtx op0 = operands[0], op1 = operands[1];
unsigned int align = GET_MODE_ALIGNMENT (mode);
- if (VALID_AVX256_REG_MODE (mode))
- cfun->machine->use_avx256_p = true;
-
/* Force constants other than zero into memory. We do not know how
the instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
@@ -15386,9 +15485,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
rtx op0, op1, m;
- if (VALID_AVX256_REG_MODE (mode))
- cfun->machine->use_avx256_p = true;
-
op0 = operands[0];
op1 = operands[1];
@@ -21661,7 +21757,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
}
/* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
- if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
+ if (TARGET_VZEROUPPER)
{
rtx unspec;
int avx256;
@@ -22763,9 +22859,6 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
decl = NULL;
}
- if (use_avx256_p (mode, type))
- cfun->machine->use_avx256_p = true;
-
/* Don't do dynamic stack realignment for long long objects with
-mpreferred-stack-boundary=2. */
if (!TARGET_64BIT
@@ -22872,9 +22965,6 @@ ix86_minimum_alignment (tree exp, enum machine_mode mode,
decl = NULL;
}
- if (use_avx256_p (mode, type))
- cfun->machine->use_avx256_p = true;
-
if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
return align;
@@ -2299,9 +2299,6 @@ struct GTY(()) machine_function {
/* Nonzero if the current function uses vzeroupper. */
BOOL_BITFIELD use_vzeroupper_p : 1;
- /* Nonzero if the current function uses 256bit AVX regisers. */
- BOOL_BITFIELD use_avx256_p : 1;
-
/* Nonzero if caller passes 256bit AVX modes. */
BOOL_BITFIELD caller_pass_avx256_p : 1;
@@ -14,4 +14,4 @@ foo ()
_mm256_zeroupper ();
}
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */
+/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
@@ -16,4 +16,4 @@ foo ()
}
/* { dg-final { scan-assembler-times "\\*avx_vzeroall" 1 } } */
-/* { dg-final { scan-assembler-times "avx_vzeroupper" 3 } } */
+/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx -mtune=generic -dp" } */
+
+extern void free (void *);
+void
+bar (void *ncstrp)
+{
+ if(ncstrp==((void *)0))
+ return;
+ free(ncstrp);
+}
+
+/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */