From 9ea451aef0f1f2fb0a36a7b718f910cfe285541d Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 29 Apr 2016 08:36:59 +0100
Subject: [PATCH] Drop excess size used for run time allocated stack
variables.
The present calculation sometimes led to more stack memory being used than
necessary with alloca. First, (STACK_BOUNDARY -1) would be added to the
allocated size:
size = plus_constant (Pmode, size, extra);
size = force_operand (size, NULL_RTX);
Then round_push was called and added another (STACK_BOUNDARY - 1) before
rounding down to a multiple of STACK_BOUNDARY. On s390x this resulted in
adding 14 before rounding down for "x" in the test case pr36728-1.c.
round_push() now takes an argument to inform it about what has already been
added to size.
---
gcc/explow.c | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
@@ -949,24 +949,30 @@ anti_adjust_stack (rtx adjust)
}
/* Round the size of a block to be pushed up to the boundary required
- by this machine. SIZE is the desired size, which need not be constant. */
+ by this machine. SIZE is the desired size, which need not be constant.
+ ALREADY_ADDED is the number of units that have already been added to SIZE for
+ other alignment reasons.
+*/
static rtx
-round_push (rtx size)
+round_push (rtx size, int already_added)
{
- rtx align_rtx, alignm1_rtx;
+ rtx align_rtx, add_rtx;
if (!SUPPORTS_STACK_ALIGNMENT
|| crtl->preferred_stack_boundary == MAX_SUPPORTED_STACK_ALIGNMENT)
{
int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+ int add;
if (align == 1)
return size;
+ add = (align > already_added) ? align - already_added - 1 : 0;
+
if (CONST_INT_P (size))
{
- HOST_WIDE_INT new_size = (INTVAL (size) + align - 1) / align * align;
+ HOST_WIDE_INT new_size = (INTVAL (size) + add) / align * align;
if (INTVAL (size) != new_size)
size = GEN_INT (new_size);
@@ -974,7 +980,7 @@ round_push (rtx size)
}
align_rtx = GEN_INT (align);
- alignm1_rtx = GEN_INT (align - 1);
+ add_rtx = (add > 0) ? GEN_INT (add) : const0_rtx;
}
else
{
@@ -983,15 +989,15 @@ round_push (rtx size)
substituted by the right value in vregs pass and optimized
during combine. */
align_rtx = virtual_preferred_stack_boundary_rtx;
- alignm1_rtx = force_operand (plus_constant (Pmode, align_rtx, -1),
- NULL_RTX);
+ add_rtx = force_operand (plus_constant (Pmode, align_rtx, -1), NULL_RTX);
}
/* CEIL_DIV_EXPR needs to worry about the addition overflowing,
but we know it can't. So add ourselves and then do
TRUNC_DIV_EXPR. */
- size = expand_binop (Pmode, add_optab, size, alignm1_rtx,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ if (add_rtx != const0_rtx)
+ size = expand_binop (Pmode, add_optab, size, add_rtx,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx,
NULL_RTX, 1);
size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1);
@@ -1175,6 +1181,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
rtx_code_label *final_label;
rtx final_target, target;
unsigned extra_align = 0;
+ unsigned extra = 0;
bool must_align;
/* If we're asking for zero bytes, it doesn't matter what we point
@@ -1275,9 +1282,9 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
extra_align = BITS_PER_UNIT;
#endif
- if (must_align)
+ if (must_align && required_align > extra_align)
{
- unsigned extra = (required_align - extra_align) / BITS_PER_UNIT;
+ extra = (required_align - extra_align) / BITS_PER_UNIT;
size = plus_constant (Pmode, size, extra);
size = force_operand (size, NULL_RTX);
@@ -1285,7 +1292,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
if (flag_stack_usage_info)
stack_usage_size += extra;
- if (extra && size_align > extra_align)
+ if (size_align > extra_align)
size_align = extra_align;
}
@@ -1304,7 +1311,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
momentarily mis-aligning the stack. */
if (size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0)
{
- size = round_push (size);
+ size = round_push (size, extra);
if (flag_stack_usage_info)
{
--
2.3.0