Message ID | 20211129013246.77321-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Fix regression introduced by r12-5536. | expand |
On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote: > > There're several failures reported in [1]: > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > %vpextrw should be used in output templates. > 2. ICE in get_attr_memory for movhi_internal since some alternatives > are marked as TYPE_SSELOG. > Explicitly set memory_attr for those alternatives. > > Also this patch fixs a typo and some latent bugs which are related to > moving HImode from/to sse register w/o TARGET_AVX512FP16. > > For optimization issues discussed in PR102811, I'll create another patch for > it. > [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and > x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake} > Ok for trunk? > > gcc/ChangeLog: > > * config/i386/i386.c (ix86_secondary_reload): Without > TARGET_SSE4_1, General register is needed to move HImode from > sse register to memory. > * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of > pextrw in output templates. > * config/i386/i386.md (movhi_internal): Ditto, also fix typo of > MEM_P (operands[1]) and adjust memory/mode/prefix/type > attribute for alternatives related to sse register. OK, but please use sselog1 type instead so you don't need to introduce the memory attribute. Thanks, Uros. > --- > gcc/config/i386/i386.c | 2 +- > gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++----------- > gcc/config/i386/sse.md | 6 +++--- > 3 files changed, 36 insertions(+), 16 deletions(-) > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 3dedf522c42..7cf599f57f7 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, > } > > /* Require movement to gpr, and then store to memory. */ > - if (mode == HFmode > + if ((mode == HFmode || mode == HImode) > && !TARGET_SSE4_1 > && SSE_CLASS_P (rclass) > && !in_p && MEM_P (x)) > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 68606e57e60..2cb3e727588 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal" > case TYPE_SSELOG: > if (SSE_REG_P (operands[0])) > return MEM_P (operands[1]) > - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" > + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > else > - return MEM_P (operands[1]) > - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + return MEM_P (operands[0]) > + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" > + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; > > case TYPE_MSKLOG: > if (operands[1] == const0_rtx) > @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal" > ] > (const_string "*"))) > (set (attr "type") > - (cond [(eq_attr "alternative" "9,10,11,12,13") > + (cond [(eq_attr "alternative" "9,10,12,13") > (if_then_else (match_test "TARGET_AVX512FP16") > (const_string "ssemov") > (const_string "sselog")) > (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > + (eq_attr "alternative" "11") > + (const_string "ssemov") > (eq_attr "alternative" "8") > (const_string "msklog") > (match_test "optimize_function_for_size_p (cfun)") > @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal" > (const_string "imovx") > ] > (const_string "imov"))) > + (set (attr "memory") > + (cond [(eq_attr "alternative" "9,10") > + (const_string "none") > + (eq_attr "alternative" "12") > + (const_string "load") > + (eq_attr "alternative" "13") > + (const_string "store") > + ] > + (const_string "*"))) Please use sselog1 type instead, and the memory attribute will be calculated correctly. > (set (attr "prefix") > - (if_then_else (eq_attr "alternative" "4,5,6,7,8") > - (const_string "vex") > - (const_string "orig"))) > + (cond [(eq_attr "alternative" "9,10,11,12,13") > + (const_string "maybe_evex") > + (eq_attr "alternative" "4,5,6,7,8") > + (const_string "vex") > + ] > + (const_string "orig"))) > (set (attr "mode") > (cond [(eq_attr "type" "imovx") > (const_string "SI") > + (eq_attr "alternative" "9,10,12,13") > + (if_then_else (match_test "TARGET_AVX512FP16") > + (const_string "HI") > + (const_string "TI")) > (eq_attr "alternative" "11") > - (const_string "HF") > + (if_then_else (match_test "TARGET_AVX512FP16") > + (const_string "HF") > + (const_string "SF")) > (and (eq_attr "alternative" "1,2") > (match_operand:HI 1 "aligned_operand")) > (const_string "SI") > @@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal" > ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > else > - return MEM_P (operands[1]) > + return MEM_P (operands[0]) > ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + : "pextrw\t{$0, %1, %k0|%k0, %1, 0}"; > > default: > gcc_unreachable (); > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index b109c2aa8fa..5229b23af98 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf" > switch (which_alternative) > { > case 0: > - return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; > + return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; > case 1: > - return "vpextrw\t{%2, %1, %0|%0, %1, %2}"; > + return "%vpextrw\t{%2, %1, %0|%0, %1, %2}"; > > case 2: > operands[2] = GEN_INT (INTVAL (operands[2]) * 2); > @@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf" > gcc_unreachable (); > } > } > - [(set_attr "isa" "*,*,noavx,avx") > + [(set_attr "isa" "*,sse4,noavx,avx") > (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1") > (set_attr "prefix" "maybe_evex") > (set_attr "mode" "TI")]) > -- > 2.18.1 >
On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote: > > > > There're several failures reported in [1]: > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > > %vpextrw should be used in output templates. > > 2. ICE in get_attr_memory for movhi_internal since some alternatives > > are marked as TYPE_SSELOG. > > Explicitly set memory_attr for those alternatives. > > > > Also this patch fixs a typo and some latent bugs which are related to > > moving HImode from/to sse register w/o TARGET_AVX512FP16. > > > > For optimization issues discussed in PR102811, I'll create another patch for > > it. > > [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html > > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and > > x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake} > > Ok for trunk? > > > > gcc/ChangeLog: > > > > * config/i386/i386.c (ix86_secondary_reload): Without > > TARGET_SSE4_1, General register is needed to move HImode from > > sse register to memory. > > * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of > > pextrw in output templates. > > * config/i386/i386.md (movhi_internal): Ditto, also fix typo of > > MEM_P (operands[1]) and adjust memory/mode/prefix/type > > attribute for alternatives related to sse register. > > OK, but please use sselog1 type instead so you don't need to introduce > the memory attribute. Changed, committed as r12-5573 thanks for the review. > > Thanks, > Uros. > > > --- > > gcc/config/i386/i386.c | 2 +- > > gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++----------- > > gcc/config/i386/sse.md | 6 +++--- > > 3 files changed, 36 insertions(+), 16 deletions(-) > > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > > index 3dedf522c42..7cf599f57f7 100644 > > --- a/gcc/config/i386/i386.c > > +++ b/gcc/config/i386/i386.c > > @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, > > } > > > > /* Require movement to gpr, and then store to memory. */ > > - if (mode == HFmode > > + if ((mode == HFmode || mode == HImode) > > && !TARGET_SSE4_1 > > && SSE_CLASS_P (rclass) > > && !in_p && MEM_P (x)) > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index 68606e57e60..2cb3e727588 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal" > > case TYPE_SSELOG: > > if (SSE_REG_P (operands[0])) > > return MEM_P (operands[1]) > > - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > > - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > > + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" > > + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > > else > > - return MEM_P (operands[1]) > > - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > > + return MEM_P (operands[0]) > > + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" > > + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; > > > > case TYPE_MSKLOG: > > if (operands[1] == const0_rtx) > > @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal" > > ] > > (const_string "*"))) > > (set (attr "type") > > - (cond [(eq_attr "alternative" "9,10,11,12,13") > > + (cond [(eq_attr "alternative" "9,10,12,13") > > (if_then_else (match_test "TARGET_AVX512FP16") > > (const_string "ssemov") > > (const_string "sselog")) > > (eq_attr "alternative" "4,5,6,7") > > (const_string "mskmov") > > + (eq_attr "alternative" "11") > > + (const_string "ssemov") > > (eq_attr "alternative" "8") > > (const_string "msklog") > > (match_test "optimize_function_for_size_p (cfun)") > > @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal" > > (const_string "imovx") > > ] > > (const_string "imov"))) > > + (set (attr "memory") > > + (cond [(eq_attr "alternative" "9,10") > > + (const_string "none") > > + (eq_attr "alternative" "12") > > + (const_string "load") > > + (eq_attr "alternative" "13") > > + (const_string "store") > > + ] > > + (const_string "*"))) > > Please use sselog1 type instead, and the memory attribute will be > calculated correctly. > > > (set (attr "prefix") > > - (if_then_else (eq_attr "alternative" "4,5,6,7,8") > > - (const_string "vex") > > - (const_string "orig"))) > > + (cond [(eq_attr "alternative" "9,10,11,12,13") > > + (const_string "maybe_evex") > > + (eq_attr "alternative" "4,5,6,7,8") > > + (const_string "vex") > > + ] > > + (const_string "orig"))) > > (set (attr "mode") > > (cond [(eq_attr "type" "imovx") > > (const_string "SI") > > + (eq_attr "alternative" "9,10,12,13") > > + (if_then_else (match_test "TARGET_AVX512FP16") > > + (const_string "HI") > > + (const_string "TI")) > > (eq_attr "alternative" "11") > > - (const_string "HF") > > + (if_then_else (match_test "TARGET_AVX512FP16") > > + (const_string "HF") > > + (const_string "SF")) > > (and (eq_attr "alternative" "1,2") > > (match_operand:HI 1 "aligned_operand")) > > (const_string "SI") > > @@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal" > > ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > > : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > > else > > - return MEM_P (operands[1]) > > + return MEM_P (operands[0]) > > ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > > - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > > + : "pextrw\t{$0, %1, %k0|%k0, %1, 0}"; > > > > default: > > gcc_unreachable (); > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index b109c2aa8fa..5229b23af98 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf" > > switch (which_alternative) > > { > > case 0: > > - return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; > > + return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; > > case 1: > > - return "vpextrw\t{%2, %1, %0|%0, %1, %2}"; > > + return "%vpextrw\t{%2, %1, %0|%0, %1, %2}"; > > > > case 2: > > operands[2] = GEN_INT (INTVAL (operands[2]) * 2); > > @@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf" > > gcc_unreachable (); > > } > > } > > - [(set_attr "isa" "*,*,noavx,avx") > > + [(set_attr "isa" "*,sse4,noavx,avx") > > (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1") > > (set_attr "prefix" "maybe_evex") > > (set_attr "mode" "TI")]) > > -- > > 2.18.1 > >
On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu <crazylht@gmail.com> wrote: > > On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote: > > > > > > There're several failures reported in [1]: > > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > > > %vpextrw should be used in output templates. > > > 2. ICE in get_attr_memory for movhi_internal since some alternatives > > > are marked as TYPE_SSELOG. > > > Explicitly set memory_attr for those alternatives. > > > > > > Also this patch fixs a typo and some latent bugs which are related to > > > moving HImode from/to sse register w/o TARGET_AVX512FP16. Here are some more fixes: i386: Fix and improve movhi_internal and movhf_internal some more. An (*v,C) alternative can be added to movhi_internal to directly load HImode constant 0 to xmm register. Also, V4SFmode moves can be used for xmm->xmm moves instead of TImode moves when optimizing for size. Fix invalid %vpinsrw insn template, which needs to duplicate %xmm register for AVX targets. Optimize GPR moves in movhf_internal in the same way as in movhi_internal. Fix pinsrw and pextrw templates for AVX targets. Use sselog1 instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY and TARGET_SSE_SPLIT_REGS targets. 2021-11-29 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/102811 * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative. Do not allocate non-GPR registers. Optimize xmm->xmm moves when optimizing for size. Fix vpinsrw insn template. (*movhf_internal): Fix pinsrw and pextrw insn templates for AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves. Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY and TARGET_SSE_SPLIT_REGS targets. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and w/o -mf16c. Pushed to master. Uros. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a384dae23e2..c88374c9d2b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2495,11 +2495,12 @@ (symbol_ref "true")))]) (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))] + [(set (match_operand:HI 0 "nonimmediate_operand" + "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m") + (match_operand:HI 1 "general_operand" + "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" - { switch (get_attr_type (insn)) { @@ -2526,10 +2527,13 @@ return ix86_output_ssemov (insn, operands); case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; else return MEM_P (operands[0]) ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" @@ -2550,23 +2554,25 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "9,10,11,12") + (cond [(eq_attr "alternative" "9,10,11,12,13") (const_string "sse2") - (eq_attr "alternative" "13") + (eq_attr "alternative" "14") (const_string "sse4") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "9,10,12,13") + (cond [(eq_attr "alternative" "4,5,6,7") + (const_string "mskmov") + (eq_attr "alternative" "8") + (const_string "msklog") + (eq_attr "alternative" "9,10,13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) - (eq_attr "alternative" "4,5,6,7") - (const_string "mskmov") (eq_attr "alternative" "11") + (const_string "sselog1") + (eq_attr "alternative" "12") (const_string "ssemov") - (eq_attr "alternative" "8") - (const_string "msklog") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") (and (eq_attr "alternative" "0") @@ -2581,33 +2587,54 @@ (const_string "imovx") ] (const_string "imov"))) - (set (attr "prefix") - (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "maybe_evex") - (eq_attr "alternative" "4,5,6,7,8") - (const_string "vex") - ] - (const_string "orig"))) - (set (attr "mode") - (cond [(eq_attr "type" "imovx") - (const_string "SI") - (eq_attr "alternative" "9,10,12,13") - (if_then_else (match_test "TARGET_AVX512FP16") - (const_string "HI") - (const_string "TI")) - (eq_attr "alternative" "11") - (if_then_else (match_test "TARGET_AVX512FP16") - (const_string "HF") - (const_string "SF")) - (and (eq_attr "alternative" "1,2") - (match_operand:HI 1 "aligned_operand")) - (const_string "SI") - (and (eq_attr "alternative" "0") - (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) - (not (match_test "TARGET_HIMODE_MATH")))) - (const_string "SI") + (set (attr "prefix") + (cond [(eq_attr "alternative" "4,5,6,7,8") + (const_string "vex") + (eq_attr "alternative" "9,10,11,12,13,14") + (const_string "maybe_evex") + ] + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "9,10,13,14") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "TI")) + (eq_attr "alternative" "11") + (cond [(match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "alternative" "12") + (cond [(match_test "TARGET_AVX512FP16") + (const_string "HI") + (match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") ] - (const_string "HI")))]) + (const_string "HI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "9") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "10") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) ;; Situation is quite tricky about when to choose full sized (SImode) move ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for @@ -3774,92 +3801,110 @@ (define_insn "*movhf_internal" [(set (match_operand:HF 0 "nonimmediate_operand" - "=?r,?m,v,v,?r,m,?v,v") + "=?r,?r,?r,?m,v,v,?r,m,?v,v") (match_operand:HF 1 "general_operand" - "rmF,rF,C,v, v,v, r,m"))] + "r ,F ,m ,rF,C,v, v,v,r ,m"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) - || (TARGET_SSE && TARGET_SSE_MATH + || (TARGET_SSE2 && standard_sse_constant_p (operands[1], HFmode) == 1) || memory_operand (operands[0], HFmode))" { switch (get_attr_type (insn)) { - case TYPE_IMOV: - return "mov{w}\t{%1, %0|%0, %1}"; - - case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands); + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); - case TYPE_SSELOG: + case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; else return MEM_P (operands[0]) - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" - : "pextrw\t{$0, %1, %k0|%k0, %1, 0}"; + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; default: - gcc_unreachable (); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; } } [(set (attr "isa") - (cond [(eq_attr "alternative" "2,3,4,6,7") + (cond [(eq_attr "alternative" "4,5,6,8,9") (const_string "sse2") - (eq_attr "alternative" "5") + (eq_attr "alternative" "7") (const_string "sse4") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "0,1") - (const_string "imov") - (eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "4") (const_string "sselog1") - (eq_attr "alternative" "4,5,6,7") + (eq_attr "alternative" "5") + (const_string "ssemov") + (eq_attr "alternative" "6,7,8,9") (if_then_else (match_test ("TARGET_AVX512FP16")) (const_string "ssemov") - (const_string "sselog")) - ] - (const_string "ssemov"))) - (set (attr "memory") - (cond [(eq_attr "alternative" "4,6") - (const_string "none") - (eq_attr "alternative" "5") - (const_string "store") - (eq_attr "alternative" "7") - (const_string "load") - ] - (const_string "*"))) + (const_string "sselog1")) + (match_test "optimize_function_for_size_p (cfun)") + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "imov") + (and (match_test "TARGET_MOVX") + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) (set (attr "prefix") - (cond [(eq_attr "alternative" "0,1") - (const_string "orig") + (cond [(eq_attr "alternative" "4,5,6,7,8,9") + (const_string "maybe_vex") ] - (const_string "maybe_vex"))) + (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "0,1") - (const_string "HI") - (eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "4") (const_string "V4SF") - (eq_attr "alternative" "4,5,6,7") + (eq_attr "alternative" "6,7,8,9") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) - (eq_attr "alternative" "3") - (if_then_else - (match_test "TARGET_AVX512FP16") - (const_string "HF") - (const_string "SF")) + (eq_attr "alternative" "5") + (cond [(match_test "TARGET_AVX512FP16") + (const_string "HF") + (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (match_test "TARGET_SSE_SPLIT_REGS")) + (const_string "V4SF") + ] + (const_string "SF")) + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") ] - (const_string "*")))]) + (const_string "HI")))]) (define_split [(set (match_operand 0 "any_fp_register_operand")
On Tue, Nov 30, 2021 at 5:21 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu <crazylht@gmail.com> wrote: > > > > On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote: > > > > > > > > There're several failures reported in [1]: > > > > 1. unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)" > > > > %vpextrw should be used in output templates. > > > > 2. ICE in get_attr_memory for movhi_internal since some alternatives > > > > are marked as TYPE_SSELOG. > > > > Explicitly set memory_attr for those alternatives. > > > > > > > > Also this patch fixs a typo and some latent bugs which are related to > > > > moving HImode from/to sse register w/o TARGET_AVX512FP16. > > Here are some more fixes: > thanks. > i386: Fix and improve movhi_internal and movhf_internal some more. > > An (*v,C) alternative can be added to movhi_internal to directly load > HImode constant 0 to xmm register. Also, V4SFmode moves can be used > for xmm->xmm moves instead of TImode moves when optimizing for size. > Fix invalid %vpinsrw insn template, which needs to duplicate %xmm > register for AVX targets. > > Optimize GPR moves in movhf_internal in the same way as in movhi_internal. > Fix pinsrw and pextrw templates for AVX targets. Use sselog1 > instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY > and TARGET_SSE_SPLIT_REGS targets. > > 2021-11-29 Uroš Bizjak <ubizjak@gmail.com> > > gcc/ChangeLog: > > PR target/102811 > * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative. > Do not allocate non-GPR registers. Optimize xmm->xmm moves when > optimizing for size. Fix vpinsrw insn template. > (*movhf_internal): Fix pinsrw and pextrw insn templates for > AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves. > Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY > and TARGET_SSE_SPLIT_REGS targets. > > Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and > w/o -mf16c. > > Pushed to master. > > Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3dedf522c42..7cf599f57f7 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, } /* Require movement to gpr, and then store to memory. */ - if (mode == HFmode + if ((mode == HFmode || mode == HImode) && !TARGET_SSE4_1 && SSE_CLASS_P (rclass) && !in_p && MEM_P (x)) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 68606e57e60..2cb3e727588 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal" case TYPE_SSELOG: if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; else - return MEM_P (operands[1]) - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; + return MEM_P (operands[0]) + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; case TYPE_MSKLOG: if (operands[1] == const0_rtx) @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal" ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "9,10,11,12,13") + (cond [(eq_attr "alternative" "9,10,12,13") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog")) (eq_attr "alternative" "4,5,6,7") (const_string "mskmov") + (eq_attr "alternative" "11") + (const_string "ssemov") (eq_attr "alternative" "8") (const_string "msklog") (match_test "optimize_function_for_size_p (cfun)") @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal" (const_string "imovx") ] (const_string "imov"))) + (set (attr "memory") + (cond [(eq_attr "alternative" "9,10") + (const_string "none") + (eq_attr "alternative" "12") + (const_string "load") + (eq_attr "alternative" "13") + (const_string "store") + ] + (const_string "*"))) (set (attr "prefix") - (if_then_else (eq_attr "alternative" "4,5,6,7,8") - (const_string "vex") - (const_string "orig"))) + (cond [(eq_attr "alternative" "9,10,11,12,13") + (const_string "maybe_evex") + (eq_attr "alternative" "4,5,6,7,8") + (const_string "vex") + ] + (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") + (eq_attr "alternative" "9,10,12,13") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "TI")) (eq_attr "alternative" "11") - (const_string "HF") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HF") + (const_string "SF")) (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand")) (const_string "SI") @@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal" ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; else - return MEM_P (operands[1]) + return MEM_P (operands[0]) ? "pextrw\t{$0, %1, %0|%0, %1, 0}" - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; + : "pextrw\t{$0, %1, %k0|%k0, %1, 0}"; default: gcc_unreachable (); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b109c2aa8fa..5229b23af98 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf" switch (which_alternative) { case 0: - return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; + return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; case 1: - return "vpextrw\t{%2, %1, %0|%0, %1, %2}"; + return "%vpextrw\t{%2, %1, %0|%0, %1, %2}"; case 2: operands[2] = GEN_INT (INTVAL (operands[2]) * 2); @@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf" gcc_unreachable (); } } - [(set_attr "isa" "*,*,noavx,avx") + [(set_attr "isa" "*,sse4,noavx,avx") (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "TI")])