[RFC,i386] : Rewrite pop patterns to use POST_INC memory operands

Message ID	AANLkTikEQw1gdzwC8DFSwp4MWJ1L_OUu68bpf6UT4cM8@mail.gmail.com
State	New
Headers	show Return-Path: <gcc-patches-return-271576-incoming=patchwork.ozlabs.org@gcc.gnu.org> MIME-Version: 1.0 Date: Mon, 30 Aug 2010 19:15:49 +0200 Message-ID: <AANLkTikEQw1gdzwC8DFSwp4MWJ1L_OUu68bpf6UT4cM8@mail.gmail.com> Subject: [RFC PATCH, i386]: Rewrite pop patterns to use POST_INC memory operands From: Uros Bizjak <ubizjak@gmail.com> To: gcc-patches@gcc.gnu.org Cc: Richard Henderson <rth@redhat.com>, jh@suse.cz, Jakub Jelinek <jakub@redhat.com> Content-Type: multipart/mixed; boundary=0015175d0a82b1043d048f0d9d0a Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: i386.md =================================================================== --- i386.md (revision 163645) +++ i386.md (working copy) @@ -1636,14 +1636,7 @@ "" "ix86_expand_move (<MODE>mode, operands); DONE;") -;; Push/pop instructions. They are separate since autoinc/dec is not a -;; general_operand. -;; -;; %%% We don't use a post-inc memory reference because x86 is not a -;; general AUTO_INC_DEC host, which impacts how it is treated in flow. -;; Changing this impacts compiler performance on other non-AUTO_INC_DEC -;; targets without our curiosities, and it is just as easy to represent -;; this differently. +;; Push/pop instructions. (define_insn "*pushdi2_rex64" [(set (match_operand:DI 0 "push_operand" "=<,!<") @@ -1756,47 +1749,22 @@ [(set_attr "type" "push") (set_attr "mode" "<MODE>")]) -(define_insn "popdi1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") - (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) - (plus:DI (reg:DI SP_REG) (const_int 8)))] - "TARGET_64BIT" - "pop{q}\t%0" - [(set_attr "type" "pop") - (set_attr "mode" "DI")]) - -(define_insn "popsi1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") - (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) (const_int 4)))] - "!TARGET_64BIT" - "pop{l}\t%0" - [(set_attr "type" "pop") - (set_attr "mode" "SI")]) - -(define_insn "*popdi1_epilogue" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") - (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) - (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (mem:BLK (scratch)))] - "TARGET_64BIT" - "pop{q}\t%0" +(define_insn "pop<mode>1" + [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") + (match_operand:P 1 "pop_operand" ">"))] + "" + "pop{<imodesuffix>}\t%0" [(set_attr "type" "pop") - (set_attr "mode" "DI")]) + (set_attr "mode" "<MODE>")]) -(define_insn "*popsi1_epilogue" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") - (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) - (plus:SI (reg:SI SP_REG) (const_int 4))) +(define_insn "*pop<mode>1_epilogue" + [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") + (match_operand:P 1 "pop_operand" ">")) (clobber (mem:BLK (scratch)))] - "!TARGET_64BIT" - "pop{l}\t%0" + "" + "pop{<imodesuffix>}\t%0" [(set_attr "type" "pop") - (set_attr "mode" "SI")]) + (set_attr "mode" "<MODE>")]) (define_insn "*mov<mode>_xor" [(set (match_operand:SWI48 0 "register_operand" "=r") @@ -17037,12 +17005,13 @@ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") ;; The ESP adjustments can be done by the push and pop instructions. Resulting -;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On -;; many CPUs it is also faster, since special hardware to avoid esp +;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes. +;; On many CPUs it is also faster, since special hardware to avoid esp ;; dependencies is present. -;; While some of these conversions may be done using splitters, we use peepholes -;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL. +;; While some of these conversions may be done using splitters, we use +;; peepholes in order to allow combine_stack_adjustments pass to see +;; nonobfuscated RTL. ;; Convert prologue esp subtractions to push. ;; We need register to push. In order to keep verify_flow_info happy we have @@ -17101,13 +17070,11 @@ (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) - (clobber (mem:BLK (scratch)))])] - "") + [(parallel [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG)))) + (clobber (mem:BLK (scratch)))])]) -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. (define_peephole2 [(match_scratch:SI 0 "r") (match_scratch:SI 1 "r") @@ -17115,12 +17082,9 @@ (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + [(parallel [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG)))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") + (set (match_dup 1) (mem:SI (post_inc:SI (reg:SI SP_REG))))]) (define_peephole2 [(match_scratch:SI 0 "r") @@ -17128,12 +17092,9 @@ (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + [(parallel [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG)))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") + (set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))]) ;; Convert esp additions to pop. (define_peephole2 @@ -17141,34 +17102,26 @@ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") + [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))]) -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. (define_peephole2 [(match_scratch:SI 0 "r") (match_scratch:SI 1 "r") (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) - (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") + [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG)))) + (set (match_dup 1) (mem:SI (post_inc:SI (reg:SI SP_REG))))]) (define_peephole2 [(match_scratch:SI 0 "r") (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) (clobber (reg:CC FLAGS_REG))])] "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) - (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) - (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] - "") + [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG)))) + (set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))]) (define_peephole2 [(match_scratch:DI 0 "r") @@ -17216,13 +17169,11 @@ (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) - (clobber (mem:BLK (scratch)))])] - "") + [(parallel [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG)))) + (clobber (mem:BLK (scratch)))])]) -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. (define_peephole2 [(match_scratch:DI 0 "r") (match_scratch:DI 1 "r") @@ -17230,12 +17181,9 @@ (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + [(parallel [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG)))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") + (set (match_dup 1) (mem:DI (post_inc:DI (reg:DI SP_REG))))]) (define_peephole2 [(match_scratch:DI 0 "r") @@ -17243,12 +17191,9 @@ (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + [(parallel [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG)))) (clobber (mem:BLK (scratch)))]) - (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") + (set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))]) ;; Convert esp additions to pop. (define_peephole2 @@ -17256,34 +17201,26 @@ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") + [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))]) -;; Two pops case is tricky, since pop causes dependency on destination register. -;; We use two registers if available. +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. (define_peephole2 [(match_scratch:DI 0 "r") (match_scratch:DI 1 "r") (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) (clobber (reg:CC FLAGS_REG))])] "" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) - (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") + [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG)))) + (set (match_dup 1) (mem:DI (post_inc:DI (reg:DI SP_REG))))]) (define_peephole2 [(match_scratch:DI 0 "r") (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) (clobber (reg:CC FLAGS_REG))])] "optimize_insn_for_size_p ()" - [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) - (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) - (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] - "") + [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG)))) + (set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))]) ;; Convert compares with 1 to shorter inc/dec operations when CF is not ;; required and register dies. Similarly for 128 to -128. Index: i386.c =================================================================== --- i386.c (revision 163630) +++ i386.c (working copy) @@ -1896,7 +1896,6 @@ static const char ix86_force_align_arg_p = "force_align_arg_pointer"; static rtx (*ix86_gen_leave) (void); -static rtx (*ix86_gen_pop1) (rtx); static rtx (*ix86_gen_add3) (rtx, rtx, rtx); static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); @@ -3655,7 +3654,6 @@ override_options (bool main_args_p) if (TARGET_64BIT) { ix86_gen_leave = gen_leave_rex64; - ix86_gen_pop1 = gen_popdi1; ix86_gen_add3 = gen_adddi3; ix86_gen_sub3 = gen_subdi3; ix86_gen_sub3_carry = gen_subdi3_carry; @@ -3669,7 +3667,6 @@ override_options (bool main_args_p) else { ix86_gen_leave = gen_leave; - ix86_gen_pop1 = gen_popsi1; ix86_gen_add3 = gen_addsi3; ix86_gen_sub3 = gen_subsi3; ix86_gen_sub3_carry = gen_subsi3_carry; @@ -8173,6 +8170,18 @@ gen_push (rtx arg) arg); } +/* Generate an "pop" pattern for input ARG. */ + +static rtx +gen_pop (rtx arg) +{ + return gen_rtx_SET (VOIDmode, + arg, + gen_rtx_MEM (Pmode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx))); +} + /* Return >= 0 if there is an unused call-clobbered register available for the entire function. */ @@ -9052,7 +9061,7 @@ release_scratch_register_on_entry (struc { if (sr->saved) { - rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg)); + rtx x, insn = emit_insn (gen_pop (sr->reg)); /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */ RTX_FRAME_RELATED_P (insn) = 1; @@ -9478,7 +9487,7 @@ ix86_expand_prologue (void) { /* The frame pointer is not needed so pop %ebp again. This leaves us with a pristine state. */ - emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx)); + emit_insn (gen_pop (hard_frame_pointer_rtx)); } } @@ -9763,7 +9772,7 @@ static void ix86_emit_restore_reg_using_pop (rtx reg) { struct machine_function *m = cfun->machine; - rtx insn = emit_insn (ix86_gen_pop1 (reg)); + rtx insn = emit_insn (gen_pop (reg)); ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); m->fs.sp_offset -= UNITS_PER_WORD; @@ -9786,10 +9795,12 @@ ix86_emit_restore_reg_using_pop (rtx reg if (m->fs.cfa_reg == stack_pointer_rtx) { - m->fs.cfa_offset -= UNITS_PER_WORD; - add_reg_note (insn, REG_CFA_ADJUST_CFA, - copy_rtx (XVECEXP (PATTERN (insn), 0, 1))); + rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD); + x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); + add_reg_note (insn, REG_CFA_ADJUST_CFA, x); RTX_FRAME_RELATED_P (insn) = 1; + + m->fs.cfa_offset -= UNITS_PER_WORD; } /* When the frame pointer is the CFA, and we pop it, we are @@ -10206,7 +10217,7 @@ ix86_expand_epilogue (int style) /* There is no "pascal" calling convention in any 64bit ABI. */ gcc_assert (!TARGET_64BIT); - insn = emit_insn (gen_popsi1 (ecx)); + insn = emit_insn (gen_pop (ecx)); m->fs.cfa_offset -= UNITS_PER_WORD; m->fs.sp_offset -= UNITS_PER_WORD;

[RFC,i386] : Rewrite pop patterns to use POST_INC memory operands

Commit Message

Comments

Patch