@@ -7062,11 +7062,8 @@ static void
setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
{
rtx save_area, mem;
- rtx label;
- rtx tmp_reg;
- rtx nsse_reg;
alias_set_type set;
- int i;
+ int i, max;
/* GPR size of varargs save area. */
if (cfun->va_list_gpr_size)
@@ -7087,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
- for (i = cum->regno;
- i < X86_64_REGPARM_MAX
- && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
- i++)
+ max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+ if (max > X86_64_REGPARM_MAX)
+ max = X86_64_REGPARM_MAX;
+
+ for (i = cum->regno; i < max; i++)
{
mem = gen_rtx_MEM (Pmode,
plus_constant (save_area, i * UNITS_PER_WORD));
@@ -7102,33 +7100,41 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
if (ix86_varargs_fpr_size)
{
+ enum machine_mode smode;
+ rtx label, test;
+
/* Now emit code to save SSE registers. The AX parameter contains number
- of SSE parameter registers used to call this function. We use
- sse_prologue_save insn template that produces computed jump across
- SSE saves. We need some preparation work to get this working. */
+ of SSE parameter registers used to call this function, though all we
+ actually check here is the zero/non-zero status. */
label = gen_label_rtx ();
+ test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
+ emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
+ label));
+
+ /* If we've determined that we're only loading scalars (and not
+ vector data) then we can store doubles instead. */
+ if (crtl->stack_alignment_needed < 128)
+ smode = DFmode;
+ else
+ smode = V4SFmode;
- nsse_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
-
- /* Compute address of memory block we save into. We always use pointer
- pointing 127 bytes after first byte to store - this is needed to keep
- instruction size limited by 4 bytes (5 bytes for AVX) with one
- byte displacement. */
- tmp_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- plus_constant (save_area,
- ix86_varargs_gpr_size + 127)));
- mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
- MEM_NOTRAP_P (mem) = 1;
- set_mem_alias_set (mem, set);
- set_mem_align (mem, 64);
+ max = cum->sse_regno + cfun->va_list_fpr_size / 16;
+ if (max > X86_64_SSE_REGPARM_MAX)
+ max = X86_64_SSE_REGPARM_MAX;
- /* And finally do the dirty job! */
- emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label,
- gen_reg_rtx (Pmode)));
+ for (i = cum->sse_regno; i < max; ++i)
+ {
+ mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
+ mem = gen_rtx_MEM (smode, mem);
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
+
+ emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
+ }
+
+ emit_label (label);
}
}
@@ -79,13 +79,11 @@
;; Prologue support
UNSPEC_STACK_ALLOC
UNSPEC_SET_GOT
- UNSPEC_SSE_PROLOGUE_SAVE
UNSPEC_REG_SAVE
UNSPEC_DEF_CFA
UNSPEC_SET_RIP
UNSPEC_SET_GOT_OFFSET
UNSPEC_MEMORY_BLOCKAGE
- UNSPEC_SSE_PROLOGUE_SAVE_LOW
;; TLS support
UNSPEC_TP
@@ -17825,179 +17823,6 @@
{ return ASM_SHORT "0x0b0f"; }
[(set_attr "length" "2")])
-(define_expand "sse_prologue_save"
- [(parallel [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (clobber (reg:CC FLAGS_REG))
- (clobber (match_operand:DI 1 "register_operand" ""))
- (use (match_operand:DI 2 "immediate_operand" ""))
- (use (label_ref:DI (match_operand 3 "" "")))
- (clobber (match_operand:DI 4 "register_operand" ""))
- (use (match_dup 1))])]
- "TARGET_64BIT"
- "")
-
-;; Pre-reload version of prologue save. Until after prologue generation we don't know
-;; what the size of save instruction will be.
-;; Operand 0+operand 6 is the memory save area
-;; Operand 1 is number of registers to save (will get overwritten to operand 5)
-;; Operand 2 is number of non-vaargs SSE arguments
-;; Operand 3 is label starting the save block
-;; Operand 4 is used for temporary computation of jump address
-(define_insn "*sse_prologue_save_insn1"
- [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
- (match_operand:DI 6 "const_int_operand" "n")))
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (clobber (reg:CC FLAGS_REG))
- (clobber (match_operand:DI 1 "register_operand" "=r"))
- (use (match_operand:DI 2 "const_int_operand" "i"))
- (use (label_ref:DI (match_operand 3 "" "X")))
- (clobber (match_operand:DI 4 "register_operand" "=&r"))
- (use (match_operand:DI 5 "register_operand" "1"))]
- "TARGET_64BIT
- && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
- && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
- "#"
- [(set_attr "type" "other")
- (set_attr "memory" "store")
- (set_attr "mode" "DI")])
-
-;; We know size of save instruction; expand the computation of jump address
-;; in the jumptable.
-(define_split
- [(parallel [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (clobber (reg:CC FLAGS_REG))
- (clobber (match_operand:DI 1 "register_operand" ""))
- (use (match_operand:DI 2 "const_int_operand" ""))
- (use (match_operand 3 "" ""))
- (clobber (match_operand:DI 4 "register_operand" ""))
- (use (match_operand:DI 5 "register_operand" ""))])]
- "reload_completed"
- [(parallel [(set (match_dup 0)
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)]
- UNSPEC_SSE_PROLOGUE_SAVE_LOW))
- (use (match_dup 1))
- (use (match_dup 2))
- (use (match_dup 3))
- (use (match_dup 5))])]
-{
- /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */
- int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
-
- /* Compute address to jump to:
- label - eax*size + nnamed_sse_arguments*size. */
- if (size == 5)
- emit_insn (gen_rtx_SET (VOIDmode, operands[4],
- gen_rtx_PLUS
- (Pmode,
- gen_rtx_MULT (Pmode, operands[1],
- GEN_INT (4)),
- operands[1])));
- else if (size == 4)
- emit_insn (gen_rtx_SET (VOIDmode, operands[4],
- gen_rtx_MULT (Pmode, operands[1],
- GEN_INT (4))));
- else
- gcc_unreachable ();
- if (INTVAL (operands[2]))
- emit_move_insn
- (operands[1],
- gen_rtx_CONST (DImode,
- gen_rtx_PLUS (DImode,
- operands[3],
- GEN_INT (INTVAL (operands[2])
- * size))));
- else
- emit_move_insn (operands[1], operands[3]);
- emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
- operands[5] = GEN_INT (size);
-})
-
-(define_insn "sse_prologue_save_insn"
- [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
- (match_operand:DI 4 "const_int_operand" "n")))
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
- (use (match_operand:DI 1 "register_operand" "r"))
- (use (match_operand:DI 2 "const_int_operand" "i"))
- (use (label_ref:DI (match_operand 3 "" "X")))
- (use (match_operand:DI 5 "const_int_operand" "i"))]
- "TARGET_64BIT
- && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
- && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
-{
- int i;
- operands[0] = gen_rtx_MEM (Pmode,
- gen_rtx_PLUS (Pmode, operands[0], operands[4]));
- /* VEX instruction with a REX prefix will #UD. */
- if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
- gcc_unreachable ();
-
- output_asm_insn ("jmp\t%A1", operands);
- for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
- {
- operands[4] = adjust_address (operands[0], DImode, i*16);
- operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
- PUT_MODE (operands[4], TImode);
- if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
- output_asm_insn ("rex", operands);
- if (crtl->stack_alignment_needed < 128)
- output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
- else
- output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
- }
- targetm.asm_out.internal_label (asm_out_file, "L",
- CODE_LABEL_NUMBER (operands[3]));
- return "";
-}
- [(set_attr "type" "other")
- (set_attr "length_immediate" "0")
- (set_attr "length_address" "0")
- ;; 2 bytes for jump and opernds[4] bytes for each save.
- (set (attr "length")
- (plus (const_int 2)
- (mult (symbol_ref ("INTVAL (operands[5])"))
- (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
- (set_attr "memory" "store")
- (set_attr "modrm" "0")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "DI")])
-
(define_expand "prefetch"
[(prefetch (match_operand 0 "address_operand" "")
(match_operand:SI 1 "const_int_operand" "")