Message ID | 20240628052719.330451-3-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Enable pass_late_combine for x86. | expand |
On Fri, Jun 28, 2024 at 7:29 AM liuhongt <hongtao.liu@intel.com> wrote: > > late_combine will combine lshift + zero into *lshifrtsi3_1_zext which > cause extra mov between gpr and kmask, add ?k to the pattern. > > gcc/ChangeLog: > > PR target/115610 > * config/i386/i386.md (<*insnsi3_zext): Add alternative ?k, > enable it only for lshiftrt and under avx512bw. > * config/i386/sse.md (*klshrsi3_1_zext): New define_insn, and > add corresponding define_split after it. OK. Thanks, Uros. > --- > gcc/config/i386/i386.md | 19 +++++++++++++------ > gcc/config/i386/sse.md | 28 ++++++++++++++++++++++++++++ > 2 files changed, 41 insertions(+), 6 deletions(-) > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index fd48e764469..57a10c1af48 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -16836,10 +16836,10 @@ (define_insn "*bmi2_<insn>si3_1_zext" > (set_attr "mode" "SI")]) > > (define_insn "*<insn>si3_1_zext" > - [(set (match_operand:DI 0 "register_operand" "=r,r,r") > + [(set (match_operand:DI 0 "register_operand" "=r,r,r,?k") > (zero_extend:DI > - (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm") > - (match_operand:QI 2 "nonmemory_operand" "cI,r,cI")))) > + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm,k") > + (match_operand:QI 2 "nonmemory_operand" "cI,r,cI,I")))) > (clobber (reg:CC FLAGS_REG))] > "TARGET_64BIT > && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)" > @@ -16850,6 +16850,8 @@ (define_insn "*<insn>si3_1_zext" > case TYPE_ISHIFTX: > return "#"; > > + case TYPE_MSKLOG: > + return "#"; > default: > if (operands[2] == const1_rtx > && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) > @@ -16860,8 +16862,8 @@ (define_insn "*<insn>si3_1_zext" > : "<shift>{l}\t{%2, %k0|%k0, %2}"; > } > } > - [(set_attr "isa" "*,bmi2,apx_ndd") > - (set_attr "type" "ishift,ishiftx,ishift") > + [(set_attr "isa" "*,bmi2,apx_ndd,avx512bw") > + (set_attr "type" "ishift,ishiftx,ishift,msklog") > (set (attr "length_immediate") > (if_then_else > (and (match_operand 2 "const1_operand") > @@ -16869,7 +16871,12 @@ (define_insn "*<insn>si3_1_zext" > (match_test "optimize_function_for_size_p (cfun)"))) > (const_string "0") > (const_string "*"))) > - (set_attr "mode" "SI")]) > + (set_attr "mode" "SI") > + (set (attr "enabled") > + (if_then_else > + (eq_attr "alternative" "3") > + (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512BW") > + (const_string "*")))]) > > ;; Convert shift to the shiftx pattern to avoid flags dependency. > (define_split > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 0be2dcd8891..20665a6f097 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -2179,6 +2179,34 @@ (define_split > (match_dup 2))) > (unspec [(const_int 0)] UNSPEC_MASKOP)])]) > > +(define_insn "*klshrsi3_1_zext" > + [(set (match_operand:DI 0 "register_operand" "=k") > + (zero_extend:DI > + (lshiftrt:SI (match_operand:SI 1 "register_operand" "k") > + (match_operand 2 "const_0_to_31_operand" "I")))) > + (unspec [(const_int 0)] UNSPEC_MASKOP)] > + "TARGET_AVX512BW" > + "kshiftrd\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "type" "msklog") > + (set_attr "prefix" "vex") > + (set_attr "mode" "SI")]) > + > +(define_split > + [(set (match_operand:DI 0 "mask_reg_operand") > + (zero_extend:DI > + (lshiftrt:SI > + (match_operand:SI 1 "mask_reg_operand") > + (match_operand 2 "const_0_to_31_operand")))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_AVX512BW && reload_completed" > + [(parallel > + [(set (match_dup 0) > + (zero_extend:DI > + (lshiftrt:SI > + (match_dup 1) > + (match_dup 2)))) > + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) > + > (define_insn "ktest<mode>" > [(set (reg:CC FLAGS_REG) > (unspec:CC > -- > 2.31.1 >
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index fd48e764469..57a10c1af48 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16836,10 +16836,10 @@ (define_insn "*bmi2_<insn>si3_1_zext" (set_attr "mode" "SI")]) (define_insn "*<insn>si3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r,?k") (zero_extend:DI - (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,r,cI")))) + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm,k") + (match_operand:QI 2 "nonmemory_operand" "cI,r,cI,I")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)" @@ -16850,6 +16850,8 @@ (define_insn "*<insn>si3_1_zext" case TYPE_ISHIFTX: return "#"; + case TYPE_MSKLOG: + return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) @@ -16860,8 +16862,8 @@ (define_insn "*<insn>si3_1_zext" : "<shift>{l}\t{%2, %k0|%k0, %2}"; } } - [(set_attr "isa" "*,bmi2,apx_ndd") - (set_attr "type" "ishift,ishiftx,ishift") + [(set_attr "isa" "*,bmi2,apx_ndd,avx512bw") + (set_attr "type" "ishift,ishiftx,ishift,msklog") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16869,7 +16871,12 @@ (define_insn "*<insn>si3_1_zext" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) - (set_attr "mode" "SI")]) + (set_attr "mode" "SI") + (set (attr "enabled") + (if_then_else + (eq_attr "alternative" "3") + (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512BW") + (const_string "*")))]) ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0be2dcd8891..20665a6f097 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2179,6 +2179,34 @@ (define_split (match_dup 2))) (unspec [(const_int 0)] UNSPEC_MASKOP)])]) +(define_insn "*klshrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=k") + (zero_extend:DI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "k") + (match_operand 2 "const_0_to_31_operand" "I")))) + (unspec [(const_int 0)] UNSPEC_MASKOP)] + "TARGET_AVX512BW" + "kshiftrd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "msklog") + (set_attr "prefix" "vex") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:DI 0 "mask_reg_operand") + (zero_extend:DI + (lshiftrt:SI + (match_operand:SI 1 "mask_reg_operand") + (match_operand 2 "const_0_to_31_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512BW && reload_completed" + [(parallel + [(set (match_dup 0) + (zero_extend:DI + (lshiftrt:SI + (match_dup 1) + (match_dup 2)))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) + (define_insn "ktest<mode>" [(set (reg:CC FLAGS_REG) (unspec:CC