===================================================================
@@ -549,6 +549,7 @@ __generic_morestack (size_t *pframe_size
char *to;
void *ret;
size_t i;
+ size_t aligned;
current = __morestack_current_segment;
@@ -580,15 +581,19 @@ __generic_morestack (size_t *pframe_size
*pframe_size = current->size - param_size;
+ /* Align the returned stack to a 32-byte boundary. */
+ aligned = (param_size + 31) & ~ (size_t) 31;
+
#ifdef STACK_GROWS_DOWNWARD
{
char *bottom = (char *) (current + 1) + current->size;
- to = bottom - param_size;
- ret = bottom - param_size;
+ to = bottom - aligned;
+ ret = bottom - aligned;
}
#else
to = current + 1;
- ret = (char *) (current + 1) + param_size;
+ to += aligned - param_size;
+ ret = (char *) (current + 1) + aligned;
#endif
/* We don't call memcpy to avoid worrying about the dynamic linker
===================================================================
@@ -200,18 +200,19 @@ __morestack_non_split:
jb 2f # Get more space if we need it.
- # This breaks call/return prediction, as described above.
- incq 8(%rsp) # Increment the return address.
-
# If the instruction that we return to is
# leaq 24(%rbp), %r11n
# then we have been called by a varargs function that expects
# %ebp to hold a real value. That can only work if we do the
# full stack split routine. FIXME: This is fragile.
movq 8(%rsp),%rax
+ incq %rax # Skip ret instruction in caller.
cmpl $0x185d8d4c,(%rax)
je 2f
+ # This breaks call/return prediction, as described above.
+ incq 8(%rsp) # Increment the return address.
+
popq %rax # Restore register.
.cfi_adjust_cfa_offset -8 # Adjust for popped register.
@@ -296,9 +297,13 @@ __morestack:
# argument size is pushed then the new stack frame size is
# pushed.
- # Align stack to 16-byte boundary with enough space for saving
- # registers and passing parameters to functions we call.
- subl $40,%esp
+ # In the body of a non-leaf function, the stack pointer will
+ # be aligned to a 16-byte boundary. That is CFA + 12 in the
+ # stack picture above: (CFA + 12) % 16 == 0. At this point we
+ # have %esp == CFA - 8, so %esp % 16 == 12. We need some
+ # space for saving registers and passing parameters, and we
+ # need to wind up with %esp % 16 == 0.
+ subl $44,%esp
# Because our cleanup code may need to clobber %ebx, we need
# to save it here so the unwinder can restore the value used
@@ -393,13 +398,15 @@ __morestack:
movl %ebp,%esp # Restore stack pointer.
+ # As before, we now have %esp % 16 == 12.
+
pushl %eax # Push return value on old stack.
pushl %edx
- subl $8,%esp # Align stack to 16-byte boundary.
+ subl $4,%esp # Align stack to 16-byte boundary.
call __morestack_unblock_signals
- addl $8,%esp
+ addl $4,%esp
popl %edx # Restore return value.
popl %eax
@@ -485,15 +492,21 @@ __morestack:
pushq %r9
pushq %r11
- pushq $0 # For alignment.
+
+ # We entered morestack with the stack pointer aligned to a
+ # 16-byte boundary (the call to morestack's caller used 8
+ # bytes, and the call to morestack used 8 bytes). We have now
+ # pushed 10 registers, so we are still aligned to a 16-byte
+ # boundary.
call __morestack_block_signals
leaq -8(%rbp),%rdi # Address of new frame size.
leaq 24(%rbp),%rsi # The caller's parameters.
- addq $8,%rsp
popq %rdx # The size of the parameters.
+ subq $8,%rsp # Align stack.
+
call __generic_morestack
movq -8(%rbp),%r10 # Reload modified frame size
@@ -564,6 +577,9 @@ __morestack:
movq %rbp,%rsp # Restore stack pointer.
+ # Now (%rsp & 16) == 8.
+
+ subq $8,%rsp # For alignment.
pushq %rax # Push return value on old stack.
pushq %rdx
@@ -571,6 +587,7 @@ __morestack:
popq %rdx # Restore return value.
popq %rax
+ addq $8,%rsp
.cfi_remember_state
popq %rbp