Message ID | 20240710064534.3639636-1-hongyu.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | [APX,NF] Add a pass to convert legacy insn to NF insns | expand |
On Wed, Jul 10, 2024 at 2:46 PM Hongyu Wang <hongyu.wang@intel.com> wrote: > > Hi, > > For APX ccmp, current infrastructure will always generate cstore for > the ccmp flag user, like > > cmpe %rcx, %r8 > ccmpnel %rax, %rbx > seta %dil > add %rcx, %r9 > add %r9, %rdx > testb %dil, %dil > je .L2 > > For such case, the legacy add clobbers FLAGS_REG so there should have > extra cstore to avoid the flag be reset before using it. If the > instructions between flag producer and user are NF insns, the setcc/ > test sequence is not required. > > Add a pass to convert legacy flag clobber insns to their NF counterpart. > The convertion only happens when > 1. APX_NF enabled. > 2. For a BB, cstore was find, and there are insns between such cstore > and next explicit set insn to FLAGS_REG (test or cmp). > 3. All the insns between should have NF counterpart. > > The pass was added after rtl-ifcvt which eliminates some branch when > profitable, which could cause some flag-clobbering insn put between > cstore and jcc. > > Bootstrapped & regtested on x86_64-pc-linux-gnu and SDE. Also passed > spec2017 simulation run on SDE. > > Ok for trunk? Ok. > > gcc/ChangeLog: > > * config/i386/i386.md (has_nf): New define_attr, add to all > nf related patterns. > * config/i386/i386-features.cc (apx_nf_convert): New function > to convert Non-NF insns to their NF counterparts. > (class pass_apx_nf_convert): New pass class. > (make_pass_apx_nf_convert): New. > * config/i386/i386-passes.def: Add pass_apx_nf_convert after > rtl_ifcvt. > * config/i386/i386-protos.h (make_pass_apx_nf_convert): Declare. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-nf-2.c: New test. > --- > gcc/config/i386/i386-features.cc | 163 +++++++++++++++++++++++ > gcc/config/i386/i386-passes.def | 1 + > gcc/config/i386/i386-protos.h | 1 + > gcc/config/i386/i386.md | 67 +++++++++- > gcc/testsuite/gcc.target/i386/apx-nf-2.c | 32 +++++ > 5 files changed, 259 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf-2.c > > diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc > index fc224ed06b0..3da56ddbdcc 100644 > --- a/gcc/config/i386/i386-features.cc > +++ b/gcc/config/i386/i386-features.cc > @@ -3259,6 +3259,169 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) > return new pass_remove_partial_avx_dependency (ctxt); > } > > +/* Convert legacy instructions that clobbers EFLAGS to APX_NF > + instructions when there are no flag set between a flag > + producer and user. */ > + > +static unsigned int > +ix86_apx_nf_convert (void) > +{ > + timevar_push (TV_MACH_DEP); > + > + basic_block bb; > + rtx_insn *insn; > + hash_map <rtx_insn *, rtx> converting_map; > + auto_vec <rtx_insn *> current_convert_list; > + > + bool converting_seq = false; > + rtx cc = gen_rtx_REG (CCmode, FLAGS_REG); > + > + FOR_EACH_BB_FN (bb, cfun) > + { > + /* Reset conversion for each bb. */ > + converting_seq = false; > + FOR_BB_INSNS (bb, insn) > + { > + if (!NONDEBUG_INSN_P (insn)) > + continue; > + > + if (recog_memoized (insn) < 0) > + continue; > + > + /* Convert candidate insns after cstore, which should > + satisify the two conditions: > + 1. Is not flag user or producer, only clobbers > + FLAGS_REG. > + 2. Have corresponding nf pattern. */ > + > + rtx pat = PATTERN (insn); > + > + /* Starting convertion at first cstorecc. */ > + rtx set = NULL_RTX; > + if (!converting_seq > + && (set = single_set (insn)) > + && ix86_comparison_operator (SET_SRC (set), VOIDmode) > + && reg_overlap_mentioned_p (cc, SET_SRC (set)) > + && !reg_overlap_mentioned_p (cc, SET_DEST (set))) > + { > + converting_seq = true; > + current_convert_list.truncate (0); > + } > + /* Terminate at the next explicit flag set. */ > + else if (reg_set_p (cc, pat) > + && GET_CODE (set_of (cc, pat)) != CLOBBER) > + converting_seq = false; > + > + if (!converting_seq) > + continue; > + > + if (get_attr_has_nf (insn) > + && GET_CODE (pat) == PARALLEL) > + { > + /* Record the insn to candidate map. */ > + current_convert_list.safe_push (insn); > + converting_map.put (insn, pat); > + } > + /* If the insn clobbers flags but has no nf_attr, > + revoke all previous candidates. */ > + else if (!get_attr_has_nf (insn) > + && reg_set_p (cc, pat) > + && GET_CODE (set_of (cc, pat)) == CLOBBER) > + { > + for (auto item : current_convert_list) > + converting_map.remove (item); > + converting_seq = false; > + } > + } > + } > + > + if (!converting_map.is_empty ()) > + { > + for (auto iter = converting_map.begin (); > + iter != converting_map.end (); ++iter) > + { > + rtx_insn *replace = (*iter).first; > + rtx pat = (*iter).second; > + int i, n = 0, len = XVECLEN (pat, 0); > + rtx *new_elems = XALLOCAVEC (rtx, len); > + rtx new_pat; > + for (i = 0; i < len; i++) > + { > + rtx temp = XVECEXP (pat, 0, i); > + if (! (GET_CODE (temp) == CLOBBER > + && reg_overlap_mentioned_p (cc, > + XEXP (temp, 0)))) > + { > + new_elems[n] = temp; > + n++; > + } > + } > + > + if (n == 1) > + new_pat = new_elems[0]; > + else > + new_pat = > + gen_rtx_PARALLEL (VOIDmode, > + gen_rtvec_v (n, > + new_elems)); > + > + PATTERN (replace) = new_pat; > + INSN_CODE (replace) = -1; > + recog_memoized (replace); > + df_insn_rescan (replace); > + } > + } > + > + timevar_pop (TV_MACH_DEP); > + return 0; > +} > + > + > +namespace { > + > +const pass_data pass_data_apx_nf_convert = > +{ > + RTL_PASS, /* type */ > + "apx_nfcvt", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_MACH_DEP, /* tv_id */ > + 0, /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + 0, /* todo_flags_finish */ > +}; > + > +class pass_apx_nf_convert : public rtl_opt_pass > +{ > +public: > + pass_apx_nf_convert (gcc::context *ctxt) > + : rtl_opt_pass (pass_data_apx_nf_convert, ctxt) > + {} > + > + /* opt_pass methods: */ > + bool gate (function *) final override > + { > + return (TARGET_APX_NF > + && optimize > + && optimize_function_for_speed_p (cfun)); > + } > + > + unsigned int execute (function *) final override > + { > + return ix86_apx_nf_convert (); > + } > +}; // class pass_rpad > + > +} // anon namespace > + > +rtl_opt_pass * > +make_pass_apx_nf_convert (gcc::context *ctxt) > +{ > + return new pass_apx_nf_convert (ctxt); > +} > + > + > /* This compares the priority of target features in function DECL1 > and DECL2. It returns positive value if DECL1 is higher priority, > negative value if DECL2 is higher priority and 0 if they are the > diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def > index 2d29f65da88..99fc8805b22 100644 > --- a/gcc/config/i386/i386-passes.def > +++ b/gcc/config/i386/i386-passes.def > @@ -33,3 +33,4 @@ along with GCC; see the file COPYING3. If not see > INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_endbr_and_patchable_area); > > INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency); > + INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert); > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index 68f57393c5d..a3629b32a01 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -423,6 +423,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area > (gcc::context *); > extern rtl_opt_pass *make_pass_remove_partial_avx_dependency > (gcc::context *); > +extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *); > > extern bool ix86_has_no_direct_extern_access; > extern bool ix86_rpad_gate (); > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 214cb2e239a..0d4ee514e40 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -992,6 +992,9 @@ (define_attr "enabled" "" > (define_attr "preferred_for_size" "" (const_int 1)) > (define_attr "preferred_for_speed" "" (const_int 1)) > > +;; Define attribute to mark the insn has nf variant. > +(define_attr "has_nf" "0,1" (const_string "0")) > + > ;; Describe a user's asm statement. > (define_asm_attributes > [(set_attr "length" "128") > @@ -6565,6 +6568,7 @@ (define_insn "*add<mode>_1<nf_name>" > (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) > (const_string "1") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ;; It may seem that nonimmediate operand is proper one for operand 1. > @@ -6682,6 +6686,7 @@ (define_insn "*addhi_1<nf_name>" > (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) > (const_string "1") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "HI,HI,HI,SI,HI,HI")]) > > (define_insn "*addqi_1<nf_name>" > @@ -6750,6 +6755,7 @@ (define_insn "*addqi_1<nf_name>" > (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) > (const_string "1") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI") > ;; Potential partial reg stall on alternatives 3 and 4. > (set (attr "preferred_for_speed") > @@ -7963,6 +7969,7 @@ (define_insn "*sub<mode>_1<nf_name>" > <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd") > (set_attr "type" "alu") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn "*subsi_1_zext" > @@ -10078,6 +10085,7 @@ (define_insn "*mul<mode>3_1<nf_name>" > (match_test "<MODE>mode == HImode") > (const_string "double") > (const_string "direct"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn "*imulhi<mode>zu<nf_name>" > @@ -10145,6 +10153,7 @@ (define_insn "*mulqi3_1<nf_name>" > (const_string "direct"))) > (set_attr "amdfam10_decode" "direct") > (set_attr "bdver1_decode" "direct") > + (set_attr "has_nf" "1") > (set_attr "mode" "QI")]) > > ;; Multiply with jump on overflow. > @@ -11309,6 +11318,7 @@ (define_insn "*<u>divmod<mode>4_noext" > "" > "<sgnprefix>div{<imodesuffix>}\t%3" > [(set_attr "type" "idiv") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn "*<u>divmodsi4_noext_zext_1" > @@ -11461,6 +11471,7 @@ (define_insn "<u>divmodhiqi3<nf_name>" > && <nf_condition>" > "<nf_prefix><sgnprefix>div{b}\t%2" > [(set_attr "type" "idiv") > + (set_attr "has_nf" "1") > (set_attr "mode" "QI")]) > > ;; We cannot use div/idiv for double division, because it causes > @@ -12006,6 +12017,7 @@ (define_insn "*anddi_1<nf_name>" > (match_operand 1 "ext_QIreg_operand"))) > (const_string "1") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,SI,DI")]) > > (define_insn_and_split "*anddi_1_btr" > @@ -12112,6 +12124,7 @@ (define_insn "*and<mode>_1<nf_name>" > (match_operand 1 "ext_QIreg_operand"))) > (const_string "1") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")]) > > (define_insn "*andqi_1<nf_name>" > @@ -12129,6 +12142,7 @@ (define_insn "*andqi_1<nf_name>" > #" > [(set_attr "type" "alu,alu,alu,alu,alu,msklog") > (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*") > + (set_attr "has_nf" "1") > (set (attr "mode") > (cond [(eq_attr "alternative" "2") > (const_string "SI") > @@ -13005,6 +13019,7 @@ (define_insn "*<code><mode>_1<nf_name>" > #" > [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>") > (set_attr "type" "alu,alu, alu, alu, alu, alu, msklog") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn_and_split "*notxor<mode>_1" > @@ -13165,6 +13180,7 @@ (define_insn "*<code>qi_1<nf_name>" > #" > [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f") > (set_attr "type" "alu,alu,alu,alu,alu,msklog") > + (set_attr "has_nf" "1") > (set (attr "mode") > (cond [(eq_attr "alternative" "2") > (const_string "SI") > @@ -13731,6 +13747,7 @@ (define_insn "*neg<mode>_1<nf_name>" > <nf_prefix>neg{<imodesuffix>}\t{%1, %0|%0, %1}" > [(set_attr "type" "negnot") > (set_attr "isa" "*,apx_ndd") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn "*negsi_1_zext" > @@ -14744,6 +14761,7 @@ (define_insn "x86_64_shld<nf_name>" > "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI") > (set_attr "athlon_decode" "vector") > (set_attr "amdfam10_decode" "vector") > @@ -14763,6 +14781,7 @@ (define_insn "x86_64_shld_ndd<nf_name>" > "TARGET_APX_NDD && <nf_condition>" > "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI")]) > > (define_insn "x86_64_shld_1<nf_name>" > @@ -14780,6 +14799,7 @@ (define_insn "x86_64_shld_1<nf_name>" > "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI") > (set_attr "length_immediate" "1") > (set_attr "athlon_decode" "vector") > @@ -14800,6 +14820,7 @@ (define_insn "x86_64_shld_ndd_1<nf_name>" > && <nf_condition>" > "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI") > (set_attr "length_immediate" "1")]) > > @@ -14909,7 +14930,8 @@ (define_insn_and_split "*x86_64_shld_shrd_1_nozext" > emit_move_insn (operands[0], tmp); > } > DONE; > -}) > +} > + [(set_attr "has_nf" "1")]) > > (define_insn_and_split "*x86_64_shld_2" > [(set (match_operand:DI 0 "nonimmediate_operand") > @@ -14974,6 +14996,7 @@ (define_insn "x86_shld<nf_name>" > "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI") > (set_attr "pent_pair" "np") > (set_attr "athlon_decode" "vector") > @@ -14994,6 +15017,7 @@ (define_insn "x86_shld_ndd<nf_name>" > "TARGET_APX_NDD && <nf_condition>" > "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI")]) > > > @@ -15012,6 +15036,7 @@ (define_insn "x86_shld_1<nf_name>" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > (set_attr "length_immediate" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI") > (set_attr "pent_pair" "np") > (set_attr "athlon_decode" "vector") > @@ -15033,6 +15058,7 @@ (define_insn "x86_shld_ndd_1<nf_name>" > "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > (set_attr "length_immediate" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI")]) > > (define_insn_and_split "*x86_shld_shrd_1_nozext_nf" > @@ -15140,7 +15166,8 @@ (define_insn_and_split "*x86_shld_shrd_1_nozext" > emit_move_insn (operands[0], tmp); > } > DONE; > -}) > +} > + [(set_attr "has_nf" "1")]) > > (define_insn_and_split "*x86_shld_2" > [(set (match_operand:SI 0 "nonimmediate_operand") > @@ -15356,6 +15383,7 @@ (define_insn "*ashl<mode>3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ;; Convert shift to the shiftx pattern to avoid flags dependency. > @@ -15512,6 +15540,7 @@ (define_insn "*ashlhi3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "HI,SI,HI,HI")]) > > (define_insn "*ashlqi3_1<nf_name>" > @@ -15583,6 +15612,7 @@ (define_insn "*ashlqi3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "QI,SI,SI,QI,QI") > ;; Potential partial reg stall on alternative 1. > (set (attr "preferred_for_speed") > @@ -16184,7 +16214,8 @@ (define_insn_and_split "<insn><dwi>3_doubleword_lowpart" > operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2])); > if (!rtx_equal_p (operands[0], operands[1])) > emit_move_insn (operands[0], operands[1]); > -}) > +} > + [(set_attr "has_nf" "1")]) > > (define_insn "x86_64_shrd<nf_name>" > [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") > @@ -16201,6 +16232,7 @@ (define_insn "x86_64_shrd<nf_name>" > "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI") > (set_attr "athlon_decode" "vector") > (set_attr "amdfam10_decode" "vector") > @@ -16220,6 +16252,7 @@ (define_insn "x86_64_shrd_ndd<nf_name>" > "TARGET_APX_NDD && <nf_condition>" > "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI")]) > > (define_insn "x86_64_shrd_1<nf_name>" > @@ -16238,6 +16271,7 @@ (define_insn "x86_64_shrd_1<nf_name>" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > (set_attr "length_immediate" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI") > (set_attr "athlon_decode" "vector") > (set_attr "amdfam10_decode" "vector") > @@ -16258,6 +16292,7 @@ (define_insn "x86_64_shrd_ndd_1<nf_name>" > "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > (set_attr "length_immediate" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "DI")]) > > (define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf" > @@ -16366,7 +16401,8 @@ (define_insn_and_split "*x86_64_shrd_shld_1_nozext" > emit_move_insn (operands[0], tmp); > } > DONE; > -}) > +} > + [(set_attr "has_nf" "1")]) > > (define_insn_and_split "*x86_64_shrd_2" > [(set (match_operand:DI 0 "nonimmediate_operand") > @@ -16431,6 +16467,7 @@ (define_insn "x86_shrd<nf_name>" > "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI") > (set_attr "pent_pair" "np") > (set_attr "athlon_decode" "vector") > @@ -16451,6 +16488,7 @@ (define_insn "x86_shrd_ndd<nf_name>" > "TARGET_APX_NDD && <nf_condition>" > "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI")]) > > (define_insn "x86_shrd_1<nf_name>" > @@ -16468,6 +16506,7 @@ (define_insn "x86_shrd_1<nf_name>" > [(set_attr "type" "ishift") > (set_attr "prefix_0f" "1") > (set_attr "length_immediate" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI") > (set_attr "pent_pair" "np") > (set_attr "athlon_decode" "vector") > @@ -16489,6 +16528,7 @@ (define_insn "x86_shrd_ndd_1<nf_name>" > "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ishift") > (set_attr "length_immediate" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "SI")]) > > (define_insn_and_split "*x86_shrd_shld_1_nozext_nf" > @@ -16596,7 +16636,8 @@ (define_insn_and_split "*x86_shrd_shld_1_nozext" > emit_move_insn (operands[0], tmp); > } > DONE; > -}) > +} > + [(set_attr "has_nf" "1")]) > > (define_insn_and_split "*x86_shrd_2" > [(set (match_operand:SI 0 "nonimmediate_operand") > @@ -16668,6 +16709,7 @@ (define_insn "ashr<mode>3_cvt<nf_name>" > (set_attr "prefix_0f" "0,*,*") > (set_attr "length_immediate" "0,*,*") > (set_attr "modrm" "0,1,1") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn "*ashrsi3_cvt_zext" > @@ -16761,6 +16803,7 @@ (define_insn "*ashr<mode>3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ;; Specialization of *lshr<mode>3_1 below, extracting the SImode > @@ -16824,6 +16867,7 @@ (define_insn "*lshr<mode>3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ;; Convert shift to the shiftx pattern to avoid flags dependency. > @@ -16939,6 +16983,7 @@ (define_insn "*ashr<mode>3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn "*lshrqi3_1<nf_name>" > @@ -16976,6 +17021,7 @@ (define_insn "*lshrqi3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "QI")]) > > (define_insn "*lshrhi3_1<nf_name>" > @@ -17013,6 +17059,7 @@ (define_insn "*lshrhi3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "HI")]) > > ;; Alternative 1 is needed to work around LRA limitation, see PR82524. > @@ -17562,6 +17609,7 @@ (define_insn "*<insn><mode>3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)")))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ;; Convert rotate to the rotatex pattern to avoid flags dependency. > @@ -17706,6 +17754,7 @@ (define_insn "*<insn><mode>3_1<nf_name>" > (match_test "optimize_function_for_size_p (cfun)"))) > (const_string "0") > (const_string "*"))) > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ;; Alternative 1 is needed to work around LRA limitation, see PR82524. > @@ -20497,6 +20546,7 @@ (define_insn_and_split "clz<mode>2_lzcnt" > "ix86_expand_clear (operands[0]);" > [(set_attr "prefix_rep" "1") > (set_attr "type" "bitmanip") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ; False dependency happens when destination is only updated by tzcnt, > @@ -20525,6 +20575,7 @@ (define_insn "*clz<mode>2_lzcnt_falsedep" > "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" > [(set_attr "prefix_rep" "1") > (set_attr "type" "bitmanip") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn_and_split "*clzsi2_lzcnt_zext" > @@ -20658,6 +20709,7 @@ (define_insn_and_split "<lt_zcnt>_<mode>" > [(set_attr "type" "<lt_zcnt_type>") > (set_attr "prefix_0f" "1") > (set_attr "prefix_rep" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ; False dependency happens when destination is only updated by tzcnt, > @@ -20688,6 +20740,7 @@ (define_insn "*<lt_zcnt>_<mode>_falsedep" > [(set_attr "type" "<lt_zcnt_type>") > (set_attr "prefix_0f" "1") > (set_attr "prefix_rep" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn "<lt_zcnt>_hi<nf_name>" > @@ -20699,6 +20752,7 @@ (define_insn "<lt_zcnt>_hi<nf_name>" > [(set_attr "type" "<lt_zcnt_type>") > (set_attr "prefix_0f" "1") > (set_attr "prefix_rep" "1") > + (set_attr "has_nf" "1") > (set_attr "mode" "HI")]) > > ;; BMI instructions. > @@ -21161,6 +21215,7 @@ (define_insn_and_split "popcount<mode>2" > "ix86_expand_clear (operands[0]);" > [(set_attr "prefix_rep" "1") > (set_attr "type" "bitmanip") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > ; False dependency happens when destination is only updated by tzcnt, > @@ -21201,6 +21256,7 @@ (define_insn "*popcount<mode>2_falsedep" > } > [(set_attr "prefix_rep" "1") > (set_attr "type" "bitmanip") > + (set_attr "has_nf" "1") > (set_attr "mode" "<MODE>")]) > > (define_insn_and_split "*popcountsi2_zext" > @@ -21355,6 +21411,7 @@ (define_insn "popcounthi2<nf_name>" > } > [(set_attr "prefix_rep" "1") > (set_attr "type" "bitmanip") > + (set_attr "has_nf" "1") > (set_attr "mode" "HI")]) > > (define_expand "bswapdi2" > diff --git a/gcc/testsuite/gcc.target/i386/apx-nf-2.c b/gcc/testsuite/gcc.target/i386/apx-nf-2.c > new file mode 100644 > index 00000000000..8f5b8f6b2b7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-nf-2.c > @@ -0,0 +1,32 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O3 -mapxf" } */ > + > +int foo(int a, int b, int c, int d) > +{ > + int sum = a; > + > + if (a != c) > + { > + c += d; > + a += b; > + sum += a + c; > + if (b != d && sum < c || sum > d) > + { > + b -= d; > + sum -= b; > + } > + } > + > + return sum; > +} > + > +int foo2 (unsigned a, unsigned b, unsigned d, unsigned e, int *p) > +{ > + unsigned r; > + int c = __builtin_mul_overflow (a, b, &r); > + *p += a; > + return c ? d : e; > +} > + > +/* { dg-final { scan-assembler-not "set" } } */ > + > -- > 2.31.1 >
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index fc224ed06b0..3da56ddbdcc 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3259,6 +3259,169 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) return new pass_remove_partial_avx_dependency (ctxt); } +/* Convert legacy instructions that clobbers EFLAGS to APX_NF + instructions when there are no flag set between a flag + producer and user. */ + +static unsigned int +ix86_apx_nf_convert (void) +{ + timevar_push (TV_MACH_DEP); + + basic_block bb; + rtx_insn *insn; + hash_map <rtx_insn *, rtx> converting_map; + auto_vec <rtx_insn *> current_convert_list; + + bool converting_seq = false; + rtx cc = gen_rtx_REG (CCmode, FLAGS_REG); + + FOR_EACH_BB_FN (bb, cfun) + { + /* Reset conversion for each bb. */ + converting_seq = false; + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + if (recog_memoized (insn) < 0) + continue; + + /* Convert candidate insns after cstore, which should + satisify the two conditions: + 1. Is not flag user or producer, only clobbers + FLAGS_REG. + 2. Have corresponding nf pattern. */ + + rtx pat = PATTERN (insn); + + /* Starting convertion at first cstorecc. */ + rtx set = NULL_RTX; + if (!converting_seq + && (set = single_set (insn)) + && ix86_comparison_operator (SET_SRC (set), VOIDmode) + && reg_overlap_mentioned_p (cc, SET_SRC (set)) + && !reg_overlap_mentioned_p (cc, SET_DEST (set))) + { + converting_seq = true; + current_convert_list.truncate (0); + } + /* Terminate at the next explicit flag set. */ + else if (reg_set_p (cc, pat) + && GET_CODE (set_of (cc, pat)) != CLOBBER) + converting_seq = false; + + if (!converting_seq) + continue; + + if (get_attr_has_nf (insn) + && GET_CODE (pat) == PARALLEL) + { + /* Record the insn to candidate map. */ + current_convert_list.safe_push (insn); + converting_map.put (insn, pat); + } + /* If the insn clobbers flags but has no nf_attr, + revoke all previous candidates. */ + else if (!get_attr_has_nf (insn) + && reg_set_p (cc, pat) + && GET_CODE (set_of (cc, pat)) == CLOBBER) + { + for (auto item : current_convert_list) + converting_map.remove (item); + converting_seq = false; + } + } + } + + if (!converting_map.is_empty ()) + { + for (auto iter = converting_map.begin (); + iter != converting_map.end (); ++iter) + { + rtx_insn *replace = (*iter).first; + rtx pat = (*iter).second; + int i, n = 0, len = XVECLEN (pat, 0); + rtx *new_elems = XALLOCAVEC (rtx, len); + rtx new_pat; + for (i = 0; i < len; i++) + { + rtx temp = XVECEXP (pat, 0, i); + if (! (GET_CODE (temp) == CLOBBER + && reg_overlap_mentioned_p (cc, + XEXP (temp, 0)))) + { + new_elems[n] = temp; + n++; + } + } + + if (n == 1) + new_pat = new_elems[0]; + else + new_pat = + gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (n, + new_elems)); + + PATTERN (replace) = new_pat; + INSN_CODE (replace) = -1; + recog_memoized (replace); + df_insn_rescan (replace); + } + } + + timevar_pop (TV_MACH_DEP); + return 0; +} + + +namespace { + +const pass_data pass_data_apx_nf_convert = +{ + RTL_PASS, /* type */ + "apx_nfcvt", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_apx_nf_convert : public rtl_opt_pass +{ +public: + pass_apx_nf_convert (gcc::context *ctxt) + : rtl_opt_pass (pass_data_apx_nf_convert, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) final override + { + return (TARGET_APX_NF + && optimize + && optimize_function_for_speed_p (cfun)); + } + + unsigned int execute (function *) final override + { + return ix86_apx_nf_convert (); + } +}; // class pass_rpad + +} // anon namespace + +rtl_opt_pass * +make_pass_apx_nf_convert (gcc::context *ctxt) +{ + return new pass_apx_nf_convert (ctxt); +} + + /* This compares the priority of target features in function DECL1 and DECL2. It returns positive value if DECL1 is higher priority, negative value if DECL2 is higher priority and 0 if they are the diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def index 2d29f65da88..99fc8805b22 100644 --- a/gcc/config/i386/i386-passes.def +++ b/gcc/config/i386/i386-passes.def @@ -33,3 +33,4 @@ along with GCC; see the file COPYING3. If not see INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_endbr_and_patchable_area); INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency); + INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 68f57393c5d..a3629b32a01 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -423,6 +423,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area (gcc::context *); extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); +extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *); extern bool ix86_has_no_direct_extern_access; extern bool ix86_rpad_gate (); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 214cb2e239a..0d4ee514e40 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -992,6 +992,9 @@ (define_attr "enabled" "" (define_attr "preferred_for_size" "" (const_int 1)) (define_attr "preferred_for_speed" "" (const_int 1)) +;; Define attribute to mark the insn has nf variant. +(define_attr "has_nf" "0,1" (const_string "0")) + ;; Describe a user's asm statement. (define_asm_attributes [(set_attr "length" "128") @@ -6565,6 +6568,7 @@ (define_insn "*add<mode>_1<nf_name>" (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ;; It may seem that nonimmediate operand is proper one for operand 1. @@ -6682,6 +6686,7 @@ (define_insn "*addhi_1<nf_name>" (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "HI,HI,HI,SI,HI,HI")]) (define_insn "*addqi_1<nf_name>" @@ -6750,6 +6755,7 @@ (define_insn "*addqi_1<nf_name>" (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI") ;; Potential partial reg stall on alternatives 3 and 4. (set (attr "preferred_for_speed") @@ -7963,6 +7969,7 @@ (define_insn "*sub<mode>_1<nf_name>" <nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd") (set_attr "type" "alu") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn "*subsi_1_zext" @@ -10078,6 +10085,7 @@ (define_insn "*mul<mode>3_1<nf_name>" (match_test "<MODE>mode == HImode") (const_string "double") (const_string "direct"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn "*imulhi<mode>zu<nf_name>" @@ -10145,6 +10153,7 @@ (define_insn "*mulqi3_1<nf_name>" (const_string "direct"))) (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "direct") + (set_attr "has_nf" "1") (set_attr "mode" "QI")]) ;; Multiply with jump on overflow. @@ -11309,6 +11318,7 @@ (define_insn "*<u>divmod<mode>4_noext" "" "<sgnprefix>div{<imodesuffix>}\t%3" [(set_attr "type" "idiv") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn "*<u>divmodsi4_noext_zext_1" @@ -11461,6 +11471,7 @@ (define_insn "<u>divmodhiqi3<nf_name>" && <nf_condition>" "<nf_prefix><sgnprefix>div{b}\t%2" [(set_attr "type" "idiv") + (set_attr "has_nf" "1") (set_attr "mode" "QI")]) ;; We cannot use div/idiv for double division, because it causes @@ -12006,6 +12017,7 @@ (define_insn "*anddi_1<nf_name>" (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,DI,SI,DI")]) (define_insn_and_split "*anddi_1_btr" @@ -12112,6 +12124,7 @@ (define_insn "*and<mode>_1<nf_name>" (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")]) (define_insn "*andqi_1<nf_name>" @@ -12129,6 +12142,7 @@ (define_insn "*andqi_1<nf_name>" #" [(set_attr "type" "alu,alu,alu,alu,alu,msklog") (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*") + (set_attr "has_nf" "1") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") @@ -13005,6 +13019,7 @@ (define_insn "*<code><mode>_1<nf_name>" #" [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd,<kmov_isa>") (set_attr "type" "alu,alu, alu, alu, alu, alu, msklog") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn_and_split "*notxor<mode>_1" @@ -13165,6 +13180,7 @@ (define_insn "*<code>qi_1<nf_name>" #" [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f") (set_attr "type" "alu,alu,alu,alu,alu,msklog") + (set_attr "has_nf" "1") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") @@ -13731,6 +13747,7 @@ (define_insn "*neg<mode>_1<nf_name>" <nf_prefix>neg{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn "*negsi_1_zext" @@ -14744,6 +14761,7 @@ (define_insn "x86_64_shld<nf_name>" "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") + (set_attr "has_nf" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") @@ -14763,6 +14781,7 @@ (define_insn "x86_64_shld_ndd<nf_name>" "TARGET_APX_NDD && <nf_condition>" "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") + (set_attr "has_nf" "1") (set_attr "mode" "DI")]) (define_insn "x86_64_shld_1<nf_name>" @@ -14780,6 +14799,7 @@ (define_insn "x86_64_shld_1<nf_name>" "<nf_prefix>shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") + (set_attr "has_nf" "1") (set_attr "mode" "DI") (set_attr "length_immediate" "1") (set_attr "athlon_decode" "vector") @@ -14800,6 +14820,7 @@ (define_insn "x86_64_shld_ndd_1<nf_name>" && <nf_condition>" "<nf_prefix>shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") + (set_attr "has_nf" "1") (set_attr "mode" "DI") (set_attr "length_immediate" "1")]) @@ -14909,7 +14930,8 @@ (define_insn_and_split "*x86_64_shld_shrd_1_nozext" emit_move_insn (operands[0], tmp); } DONE; -}) +} + [(set_attr "has_nf" "1")]) (define_insn_and_split "*x86_64_shld_2" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -14974,6 +14996,7 @@ (define_insn "x86_shld<nf_name>" "<nf_prefix>shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") + (set_attr "has_nf" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") @@ -14994,6 +15017,7 @@ (define_insn "x86_shld_ndd<nf_name>" "TARGET_APX_NDD && <nf_condition>" "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") + (set_attr "has_nf" "1") (set_attr "mode" "SI")]) @@ -15012,6 +15036,7 @@ (define_insn "x86_shld_1<nf_name>" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") + (set_attr "has_nf" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") @@ -15033,6 +15058,7 @@ (define_insn "x86_shld_ndd_1<nf_name>" "<nf_prefix>shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") + (set_attr "has_nf" "1") (set_attr "mode" "SI")]) (define_insn_and_split "*x86_shld_shrd_1_nozext_nf" @@ -15140,7 +15166,8 @@ (define_insn_and_split "*x86_shld_shrd_1_nozext" emit_move_insn (operands[0], tmp); } DONE; -}) +} + [(set_attr "has_nf" "1")]) (define_insn_and_split "*x86_shld_2" [(set (match_operand:SI 0 "nonimmediate_operand") @@ -15356,6 +15383,7 @@ (define_insn "*ashl<mode>3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. @@ -15512,6 +15540,7 @@ (define_insn "*ashlhi3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "HI,SI,HI,HI")]) (define_insn "*ashlqi3_1<nf_name>" @@ -15583,6 +15612,7 @@ (define_insn "*ashlqi3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "QI,SI,SI,QI,QI") ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") @@ -16184,7 +16214,8 @@ (define_insn_and_split "<insn><dwi>3_doubleword_lowpart" operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2])); if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); -}) +} + [(set_attr "has_nf" "1")]) (define_insn "x86_64_shrd<nf_name>" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") @@ -16201,6 +16232,7 @@ (define_insn "x86_64_shrd<nf_name>" "<nf_prefix>shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") + (set_attr "has_nf" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") @@ -16220,6 +16252,7 @@ (define_insn "x86_64_shrd_ndd<nf_name>" "TARGET_APX_NDD && <nf_condition>" "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") + (set_attr "has_nf" "1") (set_attr "mode" "DI")]) (define_insn "x86_64_shrd_1<nf_name>" @@ -16238,6 +16271,7 @@ (define_insn "x86_64_shrd_1<nf_name>" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") + (set_attr "has_nf" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") @@ -16258,6 +16292,7 @@ (define_insn "x86_64_shrd_ndd_1<nf_name>" "<nf_prefix>shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") + (set_attr "has_nf" "1") (set_attr "mode" "DI")]) (define_insn_and_split "*x86_64_shrd_shld_1_nozext_nf" @@ -16366,7 +16401,8 @@ (define_insn_and_split "*x86_64_shrd_shld_1_nozext" emit_move_insn (operands[0], tmp); } DONE; -}) +} + [(set_attr "has_nf" "1")]) (define_insn_and_split "*x86_64_shrd_2" [(set (match_operand:DI 0 "nonimmediate_operand") @@ -16431,6 +16467,7 @@ (define_insn "x86_shrd<nf_name>" "<nf_prefix>shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") + (set_attr "has_nf" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") @@ -16451,6 +16488,7 @@ (define_insn "x86_shrd_ndd<nf_name>" "TARGET_APX_NDD && <nf_condition>" "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") + (set_attr "has_nf" "1") (set_attr "mode" "SI")]) (define_insn "x86_shrd_1<nf_name>" @@ -16468,6 +16506,7 @@ (define_insn "x86_shrd_1<nf_name>" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") + (set_attr "has_nf" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") @@ -16489,6 +16528,7 @@ (define_insn "x86_shrd_ndd_1<nf_name>" "<nf_prefix>shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") + (set_attr "has_nf" "1") (set_attr "mode" "SI")]) (define_insn_and_split "*x86_shrd_shld_1_nozext_nf" @@ -16596,7 +16636,8 @@ (define_insn_and_split "*x86_shrd_shld_1_nozext" emit_move_insn (operands[0], tmp); } DONE; -}) +} + [(set_attr "has_nf" "1")]) (define_insn_and_split "*x86_shrd_2" [(set (match_operand:SI 0 "nonimmediate_operand") @@ -16668,6 +16709,7 @@ (define_insn "ashr<mode>3_cvt<nf_name>" (set_attr "prefix_0f" "0,*,*") (set_attr "length_immediate" "0,*,*") (set_attr "modrm" "0,1,1") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn "*ashrsi3_cvt_zext" @@ -16761,6 +16803,7 @@ (define_insn "*ashr<mode>3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ;; Specialization of *lshr<mode>3_1 below, extracting the SImode @@ -16824,6 +16867,7 @@ (define_insn "*lshr<mode>3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. @@ -16939,6 +16983,7 @@ (define_insn "*ashr<mode>3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn "*lshrqi3_1<nf_name>" @@ -16976,6 +17021,7 @@ (define_insn "*lshrqi3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "QI")]) (define_insn "*lshrhi3_1<nf_name>" @@ -17013,6 +17059,7 @@ (define_insn "*lshrhi3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "HI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. @@ -17562,6 +17609,7 @@ (define_insn "*<insn><mode>3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)")))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ;; Convert rotate to the rotatex pattern to avoid flags dependency. @@ -17706,6 +17754,7 @@ (define_insn "*<insn><mode>3_1<nf_name>" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. @@ -20497,6 +20546,7 @@ (define_insn_and_split "clz<mode>2_lzcnt" "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ; False dependency happens when destination is only updated by tzcnt, @@ -20525,6 +20575,7 @@ (define_insn "*clz<mode>2_lzcnt_falsedep" "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn_and_split "*clzsi2_lzcnt_zext" @@ -20658,6 +20709,7 @@ (define_insn_and_split "<lt_zcnt>_<mode>" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ; False dependency happens when destination is only updated by tzcnt, @@ -20688,6 +20740,7 @@ (define_insn "*<lt_zcnt>_<mode>_falsedep" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn "<lt_zcnt>_hi<nf_name>" @@ -20699,6 +20752,7 @@ (define_insn "<lt_zcnt>_hi<nf_name>" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") + (set_attr "has_nf" "1") (set_attr "mode" "HI")]) ;; BMI instructions. @@ -21161,6 +21215,7 @@ (define_insn_and_split "popcount<mode>2" "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) ; False dependency happens when destination is only updated by tzcnt, @@ -21201,6 +21256,7 @@ (define_insn "*popcount<mode>2_falsedep" } [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") + (set_attr "has_nf" "1") (set_attr "mode" "<MODE>")]) (define_insn_and_split "*popcountsi2_zext" @@ -21355,6 +21411,7 @@ (define_insn "popcounthi2<nf_name>" } [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") + (set_attr "has_nf" "1") (set_attr "mode" "HI")]) (define_expand "bswapdi2" diff --git a/gcc/testsuite/gcc.target/i386/apx-nf-2.c b/gcc/testsuite/gcc.target/i386/apx-nf-2.c new file mode 100644 index 00000000000..8f5b8f6b2b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-nf-2.c @@ -0,0 +1,32 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O3 -mapxf" } */ + +int foo(int a, int b, int c, int d) +{ + int sum = a; + + if (a != c) + { + c += d; + a += b; + sum += a + c; + if (b != d && sum < c || sum > d) + { + b -= d; + sum -= b; + } + } + + return sum; +} + +int foo2 (unsigned a, unsigned b, unsigned d, unsigned e, int *p) +{ + unsigned r; + int c = __builtin_mul_overflow (a, b, &r); + *p += a; + return c ? d : e; +} + +/* { dg-final { scan-assembler-not "set" } } */ +