Message ID | AANLkTiko4NUrkb4DAYNfz06B_Ys7GRkVarFGojOa0=6K@mail.gmail.com |
---|---|
State | New |
Headers | show |
On 03/18/2011 01:40 PM, Uros Bizjak wrote: > if (<X87MODEF:MODE>mode == SFmode) > - insn = gen_truncxfsf2 (operands[0], reg); > + insn = gen_truncxfsf2; > else if (<X87MODEF:MODE>mode == DFmode) > - insn = gen_truncxfdf2 (operands[0], reg); > + insn = gen_truncxfdf2; > else > gcc_unreachable (); Why is this a good thing? Surely the direct calls are much better predicted by the CPU... I can certainly understand sinking the call to emit_insn, as in the second hunk; that ought to save code size. Though the compiler really ought to be able to figure that out itself. r~
On Fri, Mar 18, 2011 at 10:08 PM, Richard Henderson <rth@redhat.com> wrote: > On 03/18/2011 01:40 PM, Uros Bizjak wrote: >> if (<X87MODEF:MODE>mode == SFmode) >> - insn = gen_truncxfsf2 (operands[0], reg); >> + insn = gen_truncxfsf2; >> else if (<X87MODEF:MODE>mode == DFmode) >> - insn = gen_truncxfdf2 (operands[0], reg); >> + insn = gen_truncxfdf2; >> else >> gcc_unreachable (); > > Why is this a good thing? Surely the direct calls are much > better predicted by the CPU... I was hoping that cmove will be generated in this particular case for 64bit targets. I'm not aware of any other differences between direct and indirect calls... OTOH, call arguments do not need to be set-up twice when indirect call is used. Please note that these are not CSE'd as shown by comparing two examples below on 32bit i386: --cut here-- #include <stdio.h> #include <stdlib.h> int foo (int, int); int bar (int, int); void test (int a, int b, int c) { int x; if (c == 1) x = foo (a, b); else if (c == 2) x = bar (a, b); else abort (); printf ("%i\n", x); } void test_ (int a, int b, int c) { int (*x)(int, int); if (c == 1) x = foo; else if (c == 2) x = bar; else abort (); printf ("%i\n", x (a, b)); } --cut here-- test: subl $28, %esp movl 40(%esp), %eax movl 32(%esp), %edx movl 36(%esp), %ecx cmpl $1, %eax je .L6 cmpl $2, %eax jne .L4 movl %ecx, 4(%esp) movl %edx, (%esp) call bar .L3: movl %eax, 36(%esp) movl $.LC0, 32(%esp) addl $28, %esp jmp printf .p2align 4,,7 .p2align 3 .L6: movl %ecx, 4(%esp) movl %edx, (%esp) call foo jmp .L3 .L4: call abort test_: subl $28, %esp movl 40(%esp), %eax movl 32(%esp), %edx movl 36(%esp), %ecx cmpl $1, %eax je .L9 cmpl $2, %eax jne .L11 movl $bar, %eax .L8: movl %ecx, 4(%esp) movl %edx, (%esp) call *%eax movl $.LC0, 32(%esp) movl %eax, 36(%esp) addl $28, %esp jmp printf .p2align 4,,7 .p2align 3 .L9: .cfi_restore_state movl $foo, %eax jmp .L8 .L11: call abort > I can certainly understand sinking the call to emit_insn, as > in the second hunk; that ought to save code size. Though the > compiler really ought to be able to figure that out itself. Currently, it does not. Uros.
Index: i386.md =================================================================== --- i386.md (revision 171163) +++ i386.md (working copy) @@ -4959,18 +4959,18 @@ && !X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)) { rtx reg = gen_reg_rtx (XFmode); - rtx insn; + rtx (*insn)(rtx, rtx); emit_insn (gen_float<SSEMODEI24:mode>xf2 (reg, operands[1])); if (<X87MODEF:MODE>mode == SFmode) - insn = gen_truncxfsf2 (operands[0], reg); + insn = gen_truncxfsf2; else if (<X87MODEF:MODE>mode == DFmode) - insn = gen_truncxfdf2 (operands[0], reg); + insn = gen_truncxfdf2; else gcc_unreachable (); - emit_insn (insn); + emit_insn (insn (operands[0], reg)); DONE; } }) @@ -18216,10 +18216,13 @@ (unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] "TARGET_LWP" { - if (TARGET_64BIT) - emit_insn (gen_lwp_slwpcbdi (operands[0])); - else - emit_insn (gen_lwp_slwpcbsi (operands[0])); + rtx (*insn)(rtx); + + insn = (TARGET_64BIT + ? gen_lwp_slwpcbdi + : gen_lwp_slwpcbsi); + + emit_insn (insn (operands[0])); DONE; }) Index: sse.md =================================================================== --- sse.md (revision 171163) +++ sse.md (working copy) @@ -4122,17 +4122,21 @@ (match_operand:SI 2 "const_0_to_1_operand" "")] "TARGET_AVX" { + rtx (*insn)(rtx, rtx); + switch (INTVAL (operands[2])) { case 0: - emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1])); + insn = gen_vec_extract_lo_<mode>; break; case 1: - emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1])); + insn = gen_vec_extract_hi_<mode>; break; default: gcc_unreachable (); } + + emit_insn (insn (operands[0], operands[1])); DONE; }) @@ -11776,19 +11780,21 @@ (match_operand:SI 3 "const_0_to_1_operand" "")] "TARGET_AVX" { + rtx (*insn)(rtx, rtx, rtx); + switch (INTVAL (operands[3])) { case 0: - emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1], - operands[2])); + insn = gen_vec_set_lo_<mode>; break; case 1: - emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1], - operands[2])); + insn = gen_vec_set_hi_<mode>; break; default: gcc_unreachable (); } + + emit_insn (insn (operands[0], operands[1], operands[2])); DONE; })