Message ID | 20140919075558.GE50194@msticlxl57.ims.intel.com |
---|---|
State | New |
Headers | show |
On Fri, Sep 19, 2014 at 9:55 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote: > Here is an updated version of this patch. > > Thanks, > Ilya > -- > 2014-09-19 Ilya Enkovich <ilya.enkovich@intel.com> > > * config/i386/i386.c (ix86_option_override_internal): Do not > support x32 with MPX. > is not available. > (init_cumulative_args): Init stdarg, bnd_regno, bnds_in_bt > and force_bnd_pass. > (function_arg_advance_32): Return number of used integer > registers. > (function_arg_advance_64): Likewise. > (function_arg_advance_ms_64): Likewise. > (ix86_function_arg_advance): Handle pointer bounds. > (ix86_function_arg): Likewise. > (ix86_function_value_regno_p): Mark fisrt bounds registers as > possible function value. > (ix86_function_value_1): Handle pointer bounds type/mode > (ix86_return_in_memory): Likewise. > (ix86_print_operand): Analyse insn to decide abounf"bnd" prefix. > (ix86_expand_call): Generate returned bounds. > (ix86_bnd_prefixed_insn_p): Check if we have instrumented call > or function. > * config/i386/i386.h (ix86_args): Add bnd_regno, bnds_in_bt, > force_bnd_pass and stdarg fields. > * config/i386/i386.md (UNSPEC_BNDRET): New. > (*call_value): Add returned bounds. > (*sibcall_value): Likewise. > (*call_value_rex64_ms_sysv): Likewise. > (*call_value_pop): Likewise. > (*sibcall_value_pop): Likewise. > * config/i386/predicates.md (call_rex64_ms_sysv_operation): Adjust > to changed call patterns. > > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 4a58190..da7ffa8 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -3712,6 +3712,9 @@ ix86_option_override_internal (bool main_args_p, > if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX)) > error ("Intel MPX does not support x32"); > > + if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX)) > + error ("Intel MPX does not support x32"); > + > if (!strcmp (opts->x_ix86_arch_string, "generic")) > error ("generic CPU can be used only for %stune=%s %s", > prefix, suffix, sw); > @@ -6198,10 +6201,15 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ > FIXME: once typesytem is fixed, we won't need this code anymore. */ > if (i && i->local && i->can_change_signature) > fntype = TREE_TYPE (fndecl); > + cum->stdarg = fntype ? stdarg_p (fntype) : false; No need for fntype check, stdarg_p already checks its argument, please see tree.c. > cum->maybe_vaarg = (fntype > ? (!prototype_p (fntype) || stdarg_p (fntype)) > : !libname); > > + cum->bnd_regno = FIRST_BND_REG; > + cum->bnds_in_bt = 0; > + cum->force_bnd_pass = 0; > + > if (!TARGET_64BIT) > { > /* If there are variable arguments, then we won't pass anything > @@ -7136,13 +7144,17 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode, > > /* Update the data in CUM to advance over an argument of mode MODE > and data type TYPE. (TYPE is null for libcalls where that information > - may not be available.) */ > + may not be available.) > > -static void > + Return a number of integer regsiters advanced over. */ > + > +static int > function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, > const_tree type, HOST_WIDE_INT bytes, > HOST_WIDE_INT words) > { > + int res = 0; > + > switch (mode) > { > default: > @@ -7160,7 +7172,8 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, > cum->words += words; > cum->nregs -= words; > cum->regno += words; > - > + if (cum->nregs >= 0) > + res = words; > if (cum->nregs <= 0) > { > cum->nregs = 0; > @@ -7231,9 +7244,11 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, > } > break; > } > + > + return res; > } > > -static void > +static int > function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, > const_tree type, HOST_WIDE_INT words, bool named) > { > @@ -7242,7 +7257,7 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, > /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ > if (!named && (VALID_AVX512F_REG_MODE (mode) > || VALID_AVX256_REG_MODE (mode))) > - return; > + return 0; > > if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) > && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) > @@ -7251,16 +7266,18 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, > cum->sse_nregs -= sse_nregs; > cum->regno += int_nregs; > cum->sse_regno += sse_nregs; > + return int_nregs; > } > else > { > int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; > cum->words = (cum->words + align - 1) & ~(align - 1); > cum->words += words; > + return 0; > } > } > > -static void > +static int > function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, > HOST_WIDE_INT words) > { > @@ -7272,7 +7289,9 @@ function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, > { > cum->nregs -= 1; > cum->regno += 1; > + return 1; > } > + return 0; > } > > /* Update the data in CUM to advance over an argument of mode MODE and > @@ -7285,6 +7304,7 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, > { > CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); > HOST_WIDE_INT bytes, words; > + int nregs; > > if (mode == BLKmode) > bytes = int_size_in_bytes (type); > @@ -7295,12 +7315,51 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, > if (type) > mode = type_natural_mode (type, NULL, false); > > + if ((type && POINTER_BOUNDS_TYPE_P (type)) > + || POINTER_BOUNDS_MODE_P (mode)) > + { > + /* If we pass bounds in BT then just update remained bounds count. */ > + if (cum->bnds_in_bt) > + { > + cum->bnds_in_bt--; > + return; > + } > + > + /* Update remained number of bounds to force. */ > + if (cum->force_bnd_pass) > + cum->force_bnd_pass--; > + > + cum->bnd_regno++; > + > + return; > + } > + > + /* The first arg not going to Bounds Tables resets this counter. */ > + cum->bnds_in_bt = 0; > + /* For unnamed args we always pass bounds to avoid bounds mess when > + passed and received types do not match. If bounds do not follow > + unnamed arg, still pretend required number of bounds were passed. */ > + if (cum->force_bnd_pass) > + { > + cum->bnd_regno += cum->force_bnd_pass; > + cum->force_bnd_pass = 0; > + } > + > if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) > - function_arg_advance_ms_64 (cum, bytes, words); > + nregs = function_arg_advance_ms_64 (cum, bytes, words); > else if (TARGET_64BIT) > - function_arg_advance_64 (cum, mode, type, words, named); > + nregs = function_arg_advance_64 (cum, mode, type, words, named); > else > - function_arg_advance_32 (cum, mode, type, bytes, words); > + nregs = function_arg_advance_32 (cum, mode, type, bytes, words); > + > + /* For stdarg we expect bounds to be passed for each value passed > + in register. */ > + if (cum->stdarg) > + cum->force_bnd_pass = nregs; > + /* For pointers passed in memory we expect bounds passed in Bounds > + Table. */ > + if (!nregs) > + cum->bnds_in_bt = chkp_type_bounds_count (type); > } > > /* Define where to put the arguments to a function. > @@ -7541,17 +7600,31 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode, > bytes = GET_MODE_SIZE (mode); > words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; > > - /* To simplify the code below, represent vector types with a vector mode > - even if MMX/SSE are not active. */ > - if (type && TREE_CODE (type) == VECTOR_TYPE) > - mode = type_natural_mode (type, cum, false); > > - if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) > - arg = function_arg_ms_64 (cum, mode, omode, named, bytes); > - else if (TARGET_64BIT) > - arg = function_arg_64 (cum, mode, omode, type, named); > + if ((type && POINTER_BOUNDS_TYPE_P (type)) > + || POINTER_BOUNDS_MODE_P (mode)) > + { > + if (cum->bnds_in_bt) > + arg = NULL; > + else if (cum->bnd_regno <= LAST_BND_REG) > + arg = gen_rtx_REG (BNDmode, cum->bnd_regno); > + else > + arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1); Please put early "return arg;" here, sou you don't have to reindend existing code. Looking at surrounding code, I don't think cum is always non-null. Also, put the whole block at the beginning of the function. > + } > else > - arg = function_arg_32 (cum, mode, omode, type, bytes, words); > + { > + /* To simplify the code below, represent vector types with a vector mode > + even if MMX/SSE are not active. */ > + if (type && TREE_CODE (type) == VECTOR_TYPE) > + mode = type_natural_mode (type, cum, false); > + > + if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) > + arg = function_arg_ms_64 (cum, mode, omode, named, bytes); > + else if (TARGET_64BIT) > + arg = function_arg_64 (cum, mode, omode, type, named); > + else > + arg = function_arg_32 (cum, mode, omode, type, bytes, words); > + } > > return arg; > } > @@ -7808,6 +7881,9 @@ ix86_function_value_regno_p (const unsigned int regno) > case SI_REG: > return TARGET_64BIT && ix86_abi != MS_ABI; > > + case FIRST_BND_REG: > + return chkp_function_instrumented_p (current_function_decl); > + > /* Complex values are returned in %st(0)/%st(1) pair. */ > case ST0_REG: > case ST1_REG: > @@ -7984,7 +8060,10 @@ ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, > fn = fntype_or_decl; > fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; > > - if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) > + if ((valtype && POINTER_BOUNDS_TYPE_P (valtype)) > + || POINTER_BOUNDS_MODE_P (mode)) > + return gen_rtx_REG (BNDmode, FIRST_BND_REG); > + else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) > return function_value_ms_64 (orig_mode, mode, valtype); > else if (TARGET_64BIT) > return function_value_64 (orig_mode, mode, valtype); > @@ -8081,6 +8160,9 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) > const enum machine_mode mode = type_natural_mode (type, NULL, true); > HOST_WIDE_INT size; > > + if (POINTER_BOUNDS_TYPE_P (type)) > + return false; > + > if (TARGET_64BIT) > { > if (ix86_function_type_abi (fntype) == MS_ABI) > @@ -15404,7 +15486,7 @@ ix86_print_operand (FILE *file, rtx x, int code) > return; > > case '!': > - if (ix86_bnd_prefixed_insn_p (NULL_RTX)) > + if (ix86_bnd_prefixed_insn_p (current_output_insn)) > fputs ("bnd ", file); > return; > > @@ -25000,10 +25082,32 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, > } > > call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); > + > if (retval) > - call = gen_rtx_SET (VOIDmode, retval, call); > + { > + /* For instrumented code we may have GPR + BR in parallel but > + it will confuse DF and we need to put each reg > + under EXPR_LIST. */ > + if (chkp_function_instrumented_p (current_function_decl)) > + chkp_put_regs_to_expr_list (retval); I assume that this is OK from infrastructure POV. I'd like to ask Jeff, if this approach is OK. > + > + call = gen_rtx_SET (VOIDmode, retval, call); > + } > vec[vec_len++] = call; > > + /* b0 and b1 registers hold bounds for returned value. */ > + if (retval) > + { > + rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG); > + rtx unspec0 = gen_rtx_UNSPEC (BND64mode, > + gen_rtvec (1, b0), UNSPEC_BNDRET); > + rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1); > + rtx unspec1 = gen_rtx_UNSPEC (BND64mode, > + gen_rtvec (1, b1), UNSPEC_BNDRET); > + vec[vec_len++] = gen_rtx_SET (BND64mode, b0, unspec0); > + vec[vec_len++] = gen_rtx_SET (BND64mode, b1, unspec1); > + } > + > if (pop) > { > pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); > @@ -46053,9 +46157,18 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) > bnd by default for current function. */ > > bool > -ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED) > +ix86_bnd_prefixed_insn_p (rtx insn) > { > - return false; > + /* For call insns check special flag. */ > + if (insn && CALL_P (insn)) > + { > + rtx call = get_call_rtx_from (insn); > + if (call) > + return CALL_EXPR_WITH_BOUNDS_P (call); > + } > + > + /* All other insns are prefixed only if function is instrumented. */ > + return chkp_function_instrumented_p (current_function_decl); > } > > /* Calculate integer abs() using only SSE2 instructions. */ > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index a38c5d1..8f77343 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -1655,6 +1655,10 @@ typedef struct ix86_args { > int float_in_sse; /* Set to 1 or 2 for 32bit targets if > SFmode/DFmode arguments should be passed > in SSE registers. Otherwise 0. */ > + int bnd_regno; /* next available bnd register number */ > + int bnds_in_bt; /* number of bounds expected in BT. */ > + int force_bnd_pass; /* number of bounds expected for stdarg arg. */ > + int stdarg; /* Set to 1 if function is stdarg. */ > enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise > MS_ABI for ms abi. */ > } CUMULATIVE_ARGS; > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index db22b06..0f3d555 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -194,6 +194,7 @@ > UNSPEC_BNDCU > UNSPEC_BNDCN > UNSPEC_MPX_FENCE > + UNSPEC_BNDRET > ]) > > (define_c_enum "unspecv" [ > @@ -11549,7 +11550,9 @@ > (define_insn "*call_value" > [(set (match_operand 0) > (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz")) > - (match_operand 2)))] > + (match_operand 2))) > + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) > + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))] Does these patterns depend on BND0/BND1 values? If not, please use (const_int 0) for their arguments. OTOH, do you really need these sets here? Looking at the gcc internals documentation (slightly unclear in this area), it should be enough to list return values in EXPR_LIST. This question is somehow connected to the new comment in ix86_expand_call, so perhaps Jeff can comment on this approach. > "!SIBLING_CALL_P (insn)" > "* return ix86_output_call_insn (insn, operands[1]);" > [(set_attr "type" "callv")]) > @@ -11557,7 +11560,9 @@ > (define_insn "*sibcall_value" > [(set (match_operand 0) > (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz")) > - (match_operand 2)))] > + (match_operand 2))) > + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) > + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))] > "SIBLING_CALL_P (insn)" > "* return ix86_output_call_insn (insn, operands[1]);" > [(set_attr "type" "callv")]) > @@ -11604,6 +11609,8 @@ > [(set (match_operand 0) > (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rBwBz")) > (match_operand 2))) > + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) > + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET)) > (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])] > "TARGET_64BIT && !SIBLING_CALL_P (insn)" > "* return ix86_output_call_insn (insn, operands[1]);" > @@ -11627,6 +11634,8 @@ > [(set (match_operand 0) > (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lmBz")) > (match_operand 2))) > + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) > + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET)) > (set (reg:SI SP_REG) > (plus:SI (reg:SI SP_REG) > (match_operand:SI 3 "immediate_operand" "i")))] > @@ -11638,6 +11647,8 @@ > [(set (match_operand 0) > (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz")) > (match_operand 2))) > + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) > + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET)) > (set (reg:SI SP_REG) > (plus:SI (reg:SI SP_REG) > (match_operand:SI 3 "immediate_operand" "i")))] > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index da01c9a..9991bb2 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -631,14 +631,17 @@ > (match_code "parallel") > { > unsigned creg_size = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers); > + unsigned adop = GET_CODE (XVECEXP (op, 0, 0)) == SET > + ? 4 > + : 2; > unsigned i; > > - if ((unsigned) XVECLEN (op, 0) != creg_size + 2) > + if ((unsigned) XVECLEN (op, 0) != creg_size + adop) > return false; > > for (i = 0; i < creg_size; i++) > { > - rtx elt = XVECEXP (op, 0, i+2); > + rtx elt = XVECEXP (op, 0, i+adop); > enum machine_mode mode; > unsigned regno; Uros.
On Sun, Sep 21, 2014 at 2:34 PM, Uros Bizjak <ubizjak@gmail.com> wrote: > On Fri, Sep 19, 2014 at 9:55 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote: > >> Here is an updated version of this patch. >> >> Thanks, >> Ilya >> -- >> 2014-09-19 Ilya Enkovich <ilya.enkovich@intel.com> >> >> * config/i386/i386.c (ix86_option_override_internal): Do not >> support x32 with MPX. >> is not available. >> (init_cumulative_args): Init stdarg, bnd_regno, bnds_in_bt >> and force_bnd_pass. >> (function_arg_advance_32): Return number of used integer >> registers. >> (function_arg_advance_64): Likewise. >> (function_arg_advance_ms_64): Likewise. >> (ix86_function_arg_advance): Handle pointer bounds. >> (ix86_function_arg): Likewise. >> (ix86_function_value_regno_p): Mark fisrt bounds registers as >> possible function value. >> (ix86_function_value_1): Handle pointer bounds type/mode >> (ix86_return_in_memory): Likewise. >> (ix86_print_operand): Analyse insn to decide abounf"bnd" prefix. >> (ix86_expand_call): Generate returned bounds. >> (ix86_bnd_prefixed_insn_p): Check if we have instrumented call >> or function. >> * config/i386/i386.h (ix86_args): Add bnd_regno, bnds_in_bt, >> force_bnd_pass and stdarg fields. >> * config/i386/i386.md (UNSPEC_BNDRET): New. >> (*call_value): Add returned bounds. >> (*sibcall_value): Likewise. >> (*call_value_rex64_ms_sysv): Likewise. >> (*call_value_pop): Likewise. >> (*sibcall_value_pop): Likewise. >> * config/i386/predicates.md (call_rex64_ms_sysv_operation): Adjust >> to changed call patterns. >> >> >> @@ -11604,6 +11609,8 @@ >> [(set (match_operand 0) >> (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rBwBz")) >> (match_operand 2))) >> + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) >> + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET)) >> (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])] >> "TARGET_64BIT && !SIBLING_CALL_P (insn)" >> "* return ix86_output_call_insn (insn, operands[1]);" This pattern has been removed [1] ... >> --- a/gcc/config/i386/predicates.md >> +++ b/gcc/config/i386/predicates.md >> @@ -631,14 +631,17 @@ >> (match_code "parallel") >> { >> unsigned creg_size = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers); >> + unsigned adop = GET_CODE (XVECEXP (op, 0, 0)) == SET >> + ? 4 >> + : 2; >> unsigned i; >> >> - if ((unsigned) XVECLEN (op, 0) != creg_size + 2) >> + if ((unsigned) XVECLEN (op, 0) != creg_size + adop) >> return false; >> >> for (i = 0; i < creg_size; i++) >> { >> - rtx elt = XVECEXP (op, 0, i+2); >> + rtx elt = XVECEXP (op, 0, i+adop); >> enum machine_mode mode; >> unsigned regno; ... and the part above has been killed as well. [1] https://gcc.gnu.org/ml/gcc-patches/2014-09/msg01739.html Uros.
On 09/21/14 06:34, Uros Bizjak wrote: > On Fri, Sep 19, 2014 at 9:55 AM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote: > >> Here is an updated version of this patch. >> @@ -25000,10 +25082,32 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, >> } >> >> call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); >> + >> if (retval) >> - call = gen_rtx_SET (VOIDmode, retval, call); >> + { >> + /* For instrumented code we may have GPR + BR in parallel but >> + it will confuse DF and we need to put each reg >> + under EXPR_LIST. */ >> + if (chkp_function_instrumented_p (current_function_decl)) >> + chkp_put_regs_to_expr_list (retval); > > I assume that this is OK from infrastructure POV. I'd like to ask > Jeff, if this approach is OK. This seems wrong. The documented way to handle multiple return values is to put them into a PARALLEL. If that's not working, I'd like Ilya to describe more precisely why. I missed this from patch #6. >> >> (define_c_enum "unspecv" [ >> @@ -11549,7 +11550,9 @@ >> (define_insn "*call_value" >> [(set (match_operand 0) >> (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz")) >> - (match_operand 2)))] >> + (match_operand 2))) >> + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) >> + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))] > > Does these patterns depend on BND0/BND1 values? If not, please use > (const_int 0) for their arguments. > > OTOH, do you really need these sets here? Looking at the gcc internals > documentation (slightly unclear in this area), it should be enough to > list return values in EXPR_LIST. This question is somehow connected to > the new comment in ix86_expand_call, so perhaps Jeff can comment on > this approach. Excellent question. In fact, I'd hazard a guess this doesn't really do what Ilya wants. As written those additional sets occur in parallel with the call. ie, all the RHSs are evaluated in parallel, then the assignments to the LHSs occur in parallel. I suspect that's not what Ilya wants -- instead I think he wants those side effects to occur after the call. This kind of scheme also doesn't tend to "play well" with exception handling & scheduling becuase you can't guarantee the sets and the call are in the same block and scheduler as a single group. See the block comment in the "call" expander on the PA port. The situation is not precisely the same, but it's a bit of a warning flag if we have the call expanders doing "other" assignments. jeff
2014-09-22 22:22 GMT+04:00 Jeff Law <law@redhat.com>: > On 09/21/14 06:34, Uros Bizjak wrote: >> >> On Fri, Sep 19, 2014 at 9:55 AM, Ilya Enkovich <enkovich.gnu@gmail.com> >> wrote: >> >>> Here is an updated version of this patch. >>> @@ -25000,10 +25082,32 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx >>> callarg1, >>> } >>> >>> call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); >>> + >>> if (retval) >>> - call = gen_rtx_SET (VOIDmode, retval, call); >>> + { >>> + /* For instrumented code we may have GPR + BR in parallel but >>> + it will confuse DF and we need to put each reg >>> + under EXPR_LIST. */ >>> + if (chkp_function_instrumented_p (current_function_decl)) >>> + chkp_put_regs_to_expr_list (retval); >> >> >> I assume that this is OK from infrastructure POV. I'd like to ask >> Jeff, if this approach is OK. > > This seems wrong. The documented way to handle multiple return values is to > put them into a PARALLEL. If that's not working, I'd like Ilya to describe > more precisely why. I missed this from patch #6. Hi, I did this change a couple of years ago and don't remember exactly what problem was caused by PARALLEL. But from my comment it seems parallel lead to values in BND0 and BND1 not to be actually defined by call from DF point of view. I'll try to reproduce a problem I had. > > >>> >>> (define_c_enum "unspecv" [ >>> @@ -11549,7 +11550,9 @@ >>> (define_insn "*call_value" >>> [(set (match_operand 0) >>> (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz")) >>> - (match_operand 2)))] >>> + (match_operand 2))) >>> + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] >>> UNSPEC_BNDRET)) >>> + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] >>> UNSPEC_BNDRET))] >> >> >> Does these patterns depend on BND0/BND1 values? If not, please use >> (const_int 0) for their arguments. >> >> OTOH, do you really need these sets here? Looking at the gcc internals >> documentation (slightly unclear in this area), it should be enough to >> list return values in EXPR_LIST. This question is somehow connected to >> the new comment in ix86_expand_call, so perhaps Jeff can comment on >> this approach. > > Excellent question. In fact, I'd hazard a guess this doesn't really do what > Ilya wants. As written those additional sets occur in parallel with the > call. ie, all the RHSs are evaluated in parallel, then the assignments to > the LHSs occur in parallel. I suspect that's not what Ilya wants -- instead > I think he wants those side effects to occur after the call. > > This kind of scheme also doesn't tend to "play well" with exception handling > & scheduling becuase you can't guarantee the sets and the call are in the > same block and scheduler as a single group. How can the sets and the call no be in the same block/group if all of them are parts of a single instruction? Ilya > > See the block comment in the "call" expander on the PA port. The situation > is not precisely the same, but it's a bit of a warning flag if we have the > call expanders doing "other" assignments. > > jeff > >
On 09/23/14 00:31, Ilya Enkovich wrote: > > I did this change a couple of years ago and don't remember exactly > what problem was caused by PARALLEL. But from my comment it seems > parallel lead to values in BND0 and BND1 not to be actually defined by > call from DF point of view. I'll try to reproduce a problem I had. Please do. That would indicate a bug in the DF infrastructure. I'm not real familiar with the DF implementation, but a quick glance at df_def_record_1 seems to indicate it's got support for a set destination being a PARALLEL. >> This kind of scheme also doesn't tend to "play well" with exception handling >> & scheduling becuase you can't guarantee the sets and the call are in the >> same block and scheduler as a single group. > > How can the sets and the call no be in the same block/group if all of > them are parts of a single instruction? Obviously in the cases where we've had these problems in the past they were distinct instructions. So EH interactions isn't going to be an issue for MPX. However, we've still got the problem that the RTL you've generated is ill-formed. If I understand things correctly, the assignments are the result of the call, that should be modeled by having the destination be a PARALLEL as mentioned earlier. Jeff
2014-09-23 22:01 GMT+04:00 Jeff Law <law@redhat.com>: > On 09/23/14 00:31, Ilya Enkovich wrote: >> >> >> I did this change a couple of years ago and don't remember exactly >> what problem was caused by PARALLEL. But from my comment it seems >> parallel lead to values in BND0 and BND1 not to be actually defined by >> call from DF point of view. I'll try to reproduce a problem I had. > > Please do. That would indicate a bug in the DF infrastructure. I'm not > real familiar with the DF implementation, but a quick glance at > df_def_record_1 seems to indicate it's got support for a set destination > being a PARALLEL. > >>> This kind of scheme also doesn't tend to "play well" with exception >>> handling >>> & scheduling becuase you can't guarantee the sets and the call are in the >>> same block and scheduler as a single group. >> >> >> How can the sets and the call no be in the same block/group if all of >> them are parts of a single instruction? > > Obviously in the cases where we've had these problems in the past they were > distinct instructions. So EH interactions isn't going to be an issue for > MPX. > > However, we've still got the problem that the RTL you've generated is > ill-formed. If I understand things correctly, the assignments are the > result of the call, that should be modeled by having the destination be a > PARALLEL as mentioned earlier. OK. Will try it. BTW call_value_pop patterns have two sets. One for returned value and one for stack register. How comes it differs much from what I do with bound regs? Thanks, Ilya > > > > Jeff
2014-09-24 11:05 GMT+04:00 Ilya Enkovich <enkovich.gnu@gmail.com>: > 2014-09-23 22:01 GMT+04:00 Jeff Law <law@redhat.com>: >> On 09/23/14 00:31, Ilya Enkovich wrote: >>> >>> >>> I did this change a couple of years ago and don't remember exactly >>> what problem was caused by PARALLEL. But from my comment it seems >>> parallel lead to values in BND0 and BND1 not to be actually defined by >>> call from DF point of view. I'll try to reproduce a problem I had. >> >> Please do. That would indicate a bug in the DF infrastructure. I'm not >> real familiar with the DF implementation, but a quick glance at >> df_def_record_1 seems to indicate it's got support for a set destination >> being a PARALLEL. >> >>>> This kind of scheme also doesn't tend to "play well" with exception >>>> handling >>>> & scheduling becuase you can't guarantee the sets and the call are in the >>>> same block and scheduler as a single group. >>> >>> >>> How can the sets and the call no be in the same block/group if all of >>> them are parts of a single instruction? >> >> Obviously in the cases where we've had these problems in the past they were >> distinct instructions. So EH interactions isn't going to be an issue for >> MPX. >> >> However, we've still got the problem that the RTL you've generated is >> ill-formed. If I understand things correctly, the assignments are the >> result of the call, that should be modeled by having the destination be a >> PARALLEL as mentioned earlier. > > OK. Will try it. BTW call_value_pop patterns have two sets. One for > returned value and one for stack register. How comes it differs much > from what I do with bound regs? > > Thanks, > Ilya > >> >> >> >> Jeff I tried to generate PARALLEL with all regs set by call. Here is a memset call I got: (call_insn 23 22 24 2 (set (parallel [ (expr_list:REG_DEP_TRUE (reg:DI 0 ax) (const_int 0 [0])) (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) (const_int 0 [0])) (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) (const_int 0 [0])) ]) (call/j (mem:QI (symbol_ref:DI ("memset") [flags 0x41] <function_decl 0x7ffff7f79400 memset.chkp>) [0 __builtin_memset S1 A8]) (const_int 0 [0]))) /export/users/ienkovic/mpx/tests/own-tests/255/test-255.c:11 652 {*call_value} (expr_list:REG_RETURNED (reg/f:DI 100) (expr_list:REG_DEAD (reg:DI 5 di) (expr_list:REG_DEAD (reg:SI 4 si) (expr_list:REG_DEAD (reg:DI 1 dx) (expr_list:REG_UNUSED (reg:BND64 78 bnd1) (expr_list:REG_UNUSED (reg:BND64 77 bnd0) (expr_list:REG_UNUSED (reg:DI 0 ax) (expr_list:REG_CALL_DECL (symbol_ref:DI ("memset") [flags 0x41] <function_decl 0x7ffff7f79400 memset.chkp>) (expr_list:REG_EH_REGION (const_int 0 [0]) (nil)))))))))) (expr_list:DI (set (reg:DI 0 ax) (reg:DI 5 di)) (expr_list:DI (use (reg:DI 5 di)) (expr_list:BND64 (use (reg:BND64 77 bnd0)) (expr_list:SI (use (reg:SI 4 si)) (expr_list:DI (use (reg:DI 1 dx)) (nil))))))) During register allocation LRA generated a weird move instruction: (insn 63 0 0 (set (reg/f:DI 100) (parallel [ (expr_list:REG_DEP_TRUE (reg:DI 0 ax) (const_int 0 [0])) (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) (const_int 0 [0])) (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) (const_int 0 [0])) ])) -1 (nil)) Which caused ICE later in LRA. This move happens because of REG_RETURNED (reg/f:DI 100) (see condition in inherit_in_ebb at lra-constraints.c:5312). Thus this code in LRA doesn't accept PARALLEL dest for calls. So my question here is should I go through problems to enable PARALLEL call destination or current sets are OK taking into account we would still have multiple sets due to call_value_pop patterns? Thanks, Ilya
On 09/24/14 01:05, Ilya Enkovich wrote: >> However, we've still got the problem that the RTL you've generated is >> ill-formed. If I understand things correctly, the assignments are the >> result of the call, that should be modeled by having the destination be a >> PARALLEL as mentioned earlier. > > OK. Will try it. BTW call_value_pop patterns have two sets. One for > returned value and one for stack register. How comes it differs much > from what I do with bound regs? The semantics of a PARALLEL are that all the values used in the expressions are evaluated, then all the side effects are performed. So: (define_insn "*call_pop" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lmBz")) (match_operand 1)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 2 "immediate_operand" "i")))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) According to the semantics of a PARALLEL would indicate that the reference to SP_REG on the RHS of the 2nd assignment expression takes the value of SP_REG *prior to the call*. And those are the semantics we depend on. So in your case the RHS references to BND0_REG and BND1_REG use the values *before* the call -- and I don't think that's the semantics you want. You might get away with it because of the UNSPEC wrapping, but IMHO, it's still ill-formed RTL. jeff
On 09/24/14 07:13, Ilya Enkovich wrote: > I tried to generate PARALLEL with all regs set by call. Here is a > memset call I got: > > (call_insn 23 22 24 2 (set (parallel [ > (expr_list:REG_DEP_TRUE (reg:DI 0 ax) > (const_int 0 [0])) > (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) > (const_int 0 [0])) > (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) > (const_int 0 [0])) > ]) > (call/j (mem:QI (symbol_ref:DI ("memset") [flags 0x41] [ snip ] Looks good. This is the approved way to handle multiple results of a call. > > During register allocation LRA generated a weird move instruction: > > (insn 63 0 0 (set (reg/f:DI 100) > (parallel [ > (expr_list:REG_DEP_TRUE (reg:DI 0 ax) > (const_int 0 [0])) > (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) > (const_int 0 [0])) > (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) > (const_int 0 [0])) > ])) -1 > (nil)) > > Which caused ICE later in LRA. This move happens because of > REG_RETURNED (reg/f:DI 100) (see condition in inherit_in_ebb at > lra-constraints.c:5312). Thus this code in LRA doesn't accept > PARALLEL dest for calls. This is a bug in LRA then. Multiple return values aren't heavily used, so I'm not surprised that its handling was missed in LRA. The question now is how to bundle things together in such a way as to make it easy for Vlad to reproduce and fix this in LRA. Jeff
2014-09-24 23:09 GMT+04:00 Jeff Law <law@redhat.com>: > On 09/24/14 07:13, Ilya Enkovich wrote: >> >> I tried to generate PARALLEL with all regs set by call. Here is a >> memset call I got: >> >> (call_insn 23 22 24 2 (set (parallel [ >> (expr_list:REG_DEP_TRUE (reg:DI 0 ax) >> (const_int 0 [0])) >> (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) >> (const_int 0 [0])) >> (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) >> (const_int 0 [0])) >> ]) >> (call/j (mem:QI (symbol_ref:DI ("memset") [flags 0x41] > > [ snip ] > Looks good. This is the approved way to handle multiple results of a call. > >> >> During register allocation LRA generated a weird move instruction: >> >> (insn 63 0 0 (set (reg/f:DI 100) >> (parallel [ >> (expr_list:REG_DEP_TRUE (reg:DI 0 ax) >> (const_int 0 [0])) >> (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) >> (const_int 0 [0])) >> (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) >> (const_int 0 [0])) >> ])) -1 >> (nil)) >> >> Which caused ICE later in LRA. This move happens because of >> REG_RETURNED (reg/f:DI 100) (see condition in inherit_in_ebb at >> lra-constraints.c:5312). Thus this code in LRA doesn't accept >> PARALLEL dest for calls. > > This is a bug in LRA then. Multiple return values aren't heavily used, so > I'm not surprised that its handling was missed in LRA. > > The question now is how to bundle things together in such a way as to make > it easy for Vlad to reproduce and fix this in LRA. > > Jeff I suppose it should be easy to reproduce using the same test case I use and some speudo patch which adds fake return values (e.g. xmm6 and xmm7) to calls. Will try to make some minimal patch and test Vlad could work with. Ilya
2014-09-25 1:51 GMT+04:00 Ilya Enkovich <enkovich.gnu@gmail.com>: > 2014-09-24 23:09 GMT+04:00 Jeff Law <law@redhat.com>: >> On 09/24/14 07:13, Ilya Enkovich wrote: >>> >>> I tried to generate PARALLEL with all regs set by call. Here is a >>> memset call I got: >>> >>> (call_insn 23 22 24 2 (set (parallel [ >>> (expr_list:REG_DEP_TRUE (reg:DI 0 ax) >>> (const_int 0 [0])) >>> (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) >>> (const_int 0 [0])) >>> (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) >>> (const_int 0 [0])) >>> ]) >>> (call/j (mem:QI (symbol_ref:DI ("memset") [flags 0x41] >> >> [ snip ] >> Looks good. This is the approved way to handle multiple results of a call. >> >>> >>> During register allocation LRA generated a weird move instruction: >>> >>> (insn 63 0 0 (set (reg/f:DI 100) >>> (parallel [ >>> (expr_list:REG_DEP_TRUE (reg:DI 0 ax) >>> (const_int 0 [0])) >>> (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) >>> (const_int 0 [0])) >>> (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) >>> (const_int 0 [0])) >>> ])) -1 >>> (nil)) >>> >>> Which caused ICE later in LRA. This move happens because of >>> REG_RETURNED (reg/f:DI 100) (see condition in inherit_in_ebb at >>> lra-constraints.c:5312). Thus this code in LRA doesn't accept >>> PARALLEL dest for calls. >> >> This is a bug in LRA then. Multiple return values aren't heavily used, so >> I'm not surprised that its handling was missed in LRA. >> >> The question now is how to bundle things together in such a way as to make >> it easy for Vlad to reproduce and fix this in LRA. >> >> Jeff > > I suppose it should be easy to reproduce using the same test case I > use and some speudo patch which adds fake return values (e.g. xmm6 and > xmm7) to calls. Will try to make some minimal patch and test Vlad > could work with. > > Ilya I couldn't reproduce the problem on a small test but chrome build shows a lot of errors. Due to the nature of the problem test's size shouldn't matter, so I attach patch which emulates situation with bounds regs (but uses xmm5 and xmm6 instead of bnd0 and bnd1) with a preprocessed chrome file. I apply patch to revision 215580. Command to reproduce: >g++ -O2 -c generated_message_reflection.ii ../../third_party/protobuf/src/google/protobuf/generated_message_reflection.cc: In member function 'virtual void google::protobuf::internal::GeneratedMessageReflection::AddBool(google::protobuf::Message*, const google::protobuf::FieldDescriptor*, bool) const': ../../third_party/protobuf/src/google/protobuf/generated_message_reflection.cc:726:3910: internal compiler error: in lra_set_insn_recog_data, at lra.c:941 0xc7b969 lra_set_insn_recog_data(rtx_insn*) ../../gcc-pl/gcc/lra.c:939 0xc79822 lra_get_insn_recog_data ../../gcc-pl/gcc/lra-int.h:473 0xc7d426 lra_update_insn_regno_info(rtx_insn*) ../../gcc-pl/gcc/lra.c:1600 0xc7d690 lra_push_insn_1 ../../gcc-pl/gcc/lra.c:1653 0xc7d6c0 lra_push_insn(rtx_insn*) ../../gcc-pl/gcc/lra.c:1661 0xc7d7bf push_insns ../../gcc-pl/gcc/lra.c:1704 0xc7da47 lra_process_new_insns(rtx_insn*, rtx_insn*, rtx_insn*, char const*) ../../gcc-pl/gcc/lra.c:1758 0xc94c80 inherit_in_ebb ../../gcc-pl/gcc/lra-constraints.c:5356 0xc9599c lra_inheritance() ../../gcc-pl/gcc/lra-constraints.c:5560 0xc7e86c lra(_IO_FILE*) ../../gcc-pl/gcc/lra.c:2223 0xc2eab8 do_reload ../../gcc-pl/gcc/ira.c:5311 0xc2edfe execute ../../gcc-pl/gcc/ira.c:5470 Please submit a full bug report, with preprocessed source if appropriate. Please include the complete backtrace with any bug report. See <http://gcc.gnu.org/bugs.html> for instructions. The problem as I see it is that in lra-constraints.c:5352 we do not check call dest is actually a register. But probably REG_RETURNED shouldn't be applied to such call because it is not clear to which return value it applies. Thanks, Ilya
Adding Vladimir. Ilya 2014-09-25 13:46 GMT+04:00 Ilya Enkovich <enkovich.gnu@gmail.com>: > 2014-09-25 1:51 GMT+04:00 Ilya Enkovich <enkovich.gnu@gmail.com>: >> 2014-09-24 23:09 GMT+04:00 Jeff Law <law@redhat.com>: >>> On 09/24/14 07:13, Ilya Enkovich wrote: >>>> >>>> I tried to generate PARALLEL with all regs set by call. Here is a >>>> memset call I got: >>>> >>>> (call_insn 23 22 24 2 (set (parallel [ >>>> (expr_list:REG_DEP_TRUE (reg:DI 0 ax) >>>> (const_int 0 [0])) >>>> (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) >>>> (const_int 0 [0])) >>>> (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) >>>> (const_int 0 [0])) >>>> ]) >>>> (call/j (mem:QI (symbol_ref:DI ("memset") [flags 0x41] >>> >>> [ snip ] >>> Looks good. This is the approved way to handle multiple results of a call. >>> >>>> >>>> During register allocation LRA generated a weird move instruction: >>>> >>>> (insn 63 0 0 (set (reg/f:DI 100) >>>> (parallel [ >>>> (expr_list:REG_DEP_TRUE (reg:DI 0 ax) >>>> (const_int 0 [0])) >>>> (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0) >>>> (const_int 0 [0])) >>>> (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1) >>>> (const_int 0 [0])) >>>> ])) -1 >>>> (nil)) >>>> >>>> Which caused ICE later in LRA. This move happens because of >>>> REG_RETURNED (reg/f:DI 100) (see condition in inherit_in_ebb at >>>> lra-constraints.c:5312). Thus this code in LRA doesn't accept >>>> PARALLEL dest for calls. >>> >>> This is a bug in LRA then. Multiple return values aren't heavily used, so >>> I'm not surprised that its handling was missed in LRA. >>> >>> The question now is how to bundle things together in such a way as to make >>> it easy for Vlad to reproduce and fix this in LRA. >>> >>> Jeff >> >> I suppose it should be easy to reproduce using the same test case I >> use and some speudo patch which adds fake return values (e.g. xmm6 and >> xmm7) to calls. Will try to make some minimal patch and test Vlad >> could work with. >> >> Ilya > > I couldn't reproduce the problem on a small test but chrome build > shows a lot of errors. Due to the nature of the problem test's size > shouldn't matter, so I attach patch which emulates situation with > bounds regs (but uses xmm5 and xmm6 instead of bnd0 and bnd1) with a > preprocessed chrome file. > > I apply patch to revision 215580. > > Command to reproduce: > >>g++ -O2 -c generated_message_reflection.ii > ../../third_party/protobuf/src/google/protobuf/generated_message_reflection.cc: > In member function 'virtual void > google::protobuf::internal::GeneratedMessageReflection::AddBool(google::protobuf::Message*, > const google::protobuf::FieldDescriptor*, bool) const': > ../../third_party/protobuf/src/google/protobuf/generated_message_reflection.cc:726:3910: > internal compiler error: in lra_set_insn_recog_data, at lra.c:941 > 0xc7b969 lra_set_insn_recog_data(rtx_insn*) > ../../gcc-pl/gcc/lra.c:939 > 0xc79822 lra_get_insn_recog_data > ../../gcc-pl/gcc/lra-int.h:473 > 0xc7d426 lra_update_insn_regno_info(rtx_insn*) > ../../gcc-pl/gcc/lra.c:1600 > 0xc7d690 lra_push_insn_1 > ../../gcc-pl/gcc/lra.c:1653 > 0xc7d6c0 lra_push_insn(rtx_insn*) > ../../gcc-pl/gcc/lra.c:1661 > 0xc7d7bf push_insns > ../../gcc-pl/gcc/lra.c:1704 > 0xc7da47 lra_process_new_insns(rtx_insn*, rtx_insn*, rtx_insn*, char const*) > ../../gcc-pl/gcc/lra.c:1758 > 0xc94c80 inherit_in_ebb > ../../gcc-pl/gcc/lra-constraints.c:5356 > 0xc9599c lra_inheritance() > ../../gcc-pl/gcc/lra-constraints.c:5560 > 0xc7e86c lra(_IO_FILE*) > ../../gcc-pl/gcc/lra.c:2223 > 0xc2eab8 do_reload > ../../gcc-pl/gcc/ira.c:5311 > 0xc2edfe execute > ../../gcc-pl/gcc/ira.c:5470 > Please submit a full bug report, > with preprocessed source if appropriate. > Please include the complete backtrace with any bug report. > See <http://gcc.gnu.org/bugs.html> for instructions. > > The problem as I see it is that in lra-constraints.c:5352 we do not > check call dest is actually a register. But probably REG_RETURNED > shouldn't be applied to such call because it is not clear to which > return value it applies. > > Thanks, > Ilya
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4a58190..da7ffa8 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3712,6 +3712,9 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX)) error ("Intel MPX does not support x32"); + if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX)) + error ("Intel MPX does not support x32"); + if (!strcmp (opts->x_ix86_arch_string, "generic")) error ("generic CPU can be used only for %stune=%s %s", prefix, suffix, sw); @@ -6198,10 +6201,15 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ FIXME: once typesytem is fixed, we won't need this code anymore. */ if (i && i->local && i->can_change_signature) fntype = TREE_TYPE (fndecl); + cum->stdarg = fntype ? stdarg_p (fntype) : false; cum->maybe_vaarg = (fntype ? (!prototype_p (fntype) || stdarg_p (fntype)) : !libname); + cum->bnd_regno = FIRST_BND_REG; + cum->bnds_in_bt = 0; + cum->force_bnd_pass = 0; + if (!TARGET_64BIT) { /* If there are variable arguments, then we won't pass anything @@ -7136,13 +7144,17 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode, /* Update the data in CUM to advance over an argument of mode MODE and data type TYPE. (TYPE is null for libcalls where that information - may not be available.) */ + may not be available.) -static void + Return a number of integer regsiters advanced over. */ + +static int function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, const_tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { + int res = 0; + switch (mode) { default: @@ -7160,7 +7172,8 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, cum->words += words; cum->nregs -= words; cum->regno += words; - + if (cum->nregs >= 0) + res = words; if (cum->nregs <= 0) { cum->nregs = 0; @@ -7231,9 +7244,11 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, } break; } + + return res; } -static void +static int function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, const_tree type, HOST_WIDE_INT words, bool named) { @@ -7242,7 +7257,7 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ if (!named && (VALID_AVX512F_REG_MODE (mode) || VALID_AVX256_REG_MODE (mode))) - return; + return 0; if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) @@ -7251,16 +7266,18 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, cum->sse_nregs -= sse_nregs; cum->regno += int_nregs; cum->sse_regno += sse_nregs; + return int_nregs; } else { int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; cum->words = (cum->words + align - 1) & ~(align - 1); cum->words += words; + return 0; } } -static void +static int function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { @@ -7272,7 +7289,9 @@ function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, { cum->nregs -= 1; cum->regno += 1; + return 1; } + return 0; } /* Update the data in CUM to advance over an argument of mode MODE and @@ -7285,6 +7304,7 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); HOST_WIDE_INT bytes, words; + int nregs; if (mode == BLKmode) bytes = int_size_in_bytes (type); @@ -7295,12 +7315,51 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, if (type) mode = type_natural_mode (type, NULL, false); + if ((type && POINTER_BOUNDS_TYPE_P (type)) + || POINTER_BOUNDS_MODE_P (mode)) + { + /* If we pass bounds in BT then just update remained bounds count. */ + if (cum->bnds_in_bt) + { + cum->bnds_in_bt--; + return; + } + + /* Update remained number of bounds to force. */ + if (cum->force_bnd_pass) + cum->force_bnd_pass--; + + cum->bnd_regno++; + + return; + } + + /* The first arg not going to Bounds Tables resets this counter. */ + cum->bnds_in_bt = 0; + /* For unnamed args we always pass bounds to avoid bounds mess when + passed and received types do not match. If bounds do not follow + unnamed arg, still pretend required number of bounds were passed. */ + if (cum->force_bnd_pass) + { + cum->bnd_regno += cum->force_bnd_pass; + cum->force_bnd_pass = 0; + } + if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) - function_arg_advance_ms_64 (cum, bytes, words); + nregs = function_arg_advance_ms_64 (cum, bytes, words); else if (TARGET_64BIT) - function_arg_advance_64 (cum, mode, type, words, named); + nregs = function_arg_advance_64 (cum, mode, type, words, named); else - function_arg_advance_32 (cum, mode, type, bytes, words); + nregs = function_arg_advance_32 (cum, mode, type, bytes, words); + + /* For stdarg we expect bounds to be passed for each value passed + in register. */ + if (cum->stdarg) + cum->force_bnd_pass = nregs; + /* For pointers passed in memory we expect bounds passed in Bounds + Table. */ + if (!nregs) + cum->bnds_in_bt = chkp_type_bounds_count (type); } /* Define where to put the arguments to a function. @@ -7541,17 +7600,31 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode, bytes = GET_MODE_SIZE (mode); words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - /* To simplify the code below, represent vector types with a vector mode - even if MMX/SSE are not active. */ - if (type && TREE_CODE (type) == VECTOR_TYPE) - mode = type_natural_mode (type, cum, false); - if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) - arg = function_arg_ms_64 (cum, mode, omode, named, bytes); - else if (TARGET_64BIT) - arg = function_arg_64 (cum, mode, omode, type, named); + if ((type && POINTER_BOUNDS_TYPE_P (type)) + || POINTER_BOUNDS_MODE_P (mode)) + { + if (cum->bnds_in_bt) + arg = NULL; + else if (cum->bnd_regno <= LAST_BND_REG) + arg = gen_rtx_REG (BNDmode, cum->bnd_regno); + else + arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1); + } else - arg = function_arg_32 (cum, mode, omode, type, bytes, words); + { + /* To simplify the code below, represent vector types with a vector mode + even if MMX/SSE are not active. */ + if (type && TREE_CODE (type) == VECTOR_TYPE) + mode = type_natural_mode (type, cum, false); + + if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) + arg = function_arg_ms_64 (cum, mode, omode, named, bytes); + else if (TARGET_64BIT) + arg = function_arg_64 (cum, mode, omode, type, named); + else + arg = function_arg_32 (cum, mode, omode, type, bytes, words); + } return arg; } @@ -7808,6 +7881,9 @@ ix86_function_value_regno_p (const unsigned int regno) case SI_REG: return TARGET_64BIT && ix86_abi != MS_ABI; + case FIRST_BND_REG: + return chkp_function_instrumented_p (current_function_decl); + /* Complex values are returned in %st(0)/%st(1) pair. */ case ST0_REG: case ST1_REG: @@ -7984,7 +8060,10 @@ ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, fn = fntype_or_decl; fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; - if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) + if ((valtype && POINTER_BOUNDS_TYPE_P (valtype)) + || POINTER_BOUNDS_MODE_P (mode)) + return gen_rtx_REG (BNDmode, FIRST_BND_REG); + else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) return function_value_ms_64 (orig_mode, mode, valtype); else if (TARGET_64BIT) return function_value_64 (orig_mode, mode, valtype); @@ -8081,6 +8160,9 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) const enum machine_mode mode = type_natural_mode (type, NULL, true); HOST_WIDE_INT size; + if (POINTER_BOUNDS_TYPE_P (type)) + return false; + if (TARGET_64BIT) { if (ix86_function_type_abi (fntype) == MS_ABI) @@ -15404,7 +15486,7 @@ ix86_print_operand (FILE *file, rtx x, int code) return; case '!': - if (ix86_bnd_prefixed_insn_p (NULL_RTX)) + if (ix86_bnd_prefixed_insn_p (current_output_insn)) fputs ("bnd ", file); return; @@ -25000,10 +25082,32 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, } call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); + if (retval) - call = gen_rtx_SET (VOIDmode, retval, call); + { + /* For instrumented code we may have GPR + BR in parallel but + it will confuse DF and we need to put each reg + under EXPR_LIST. */ + if (chkp_function_instrumented_p (current_function_decl)) + chkp_put_regs_to_expr_list (retval); + + call = gen_rtx_SET (VOIDmode, retval, call); + } vec[vec_len++] = call; + /* b0 and b1 registers hold bounds for returned value. */ + if (retval) + { + rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG); + rtx unspec0 = gen_rtx_UNSPEC (BND64mode, + gen_rtvec (1, b0), UNSPEC_BNDRET); + rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1); + rtx unspec1 = gen_rtx_UNSPEC (BND64mode, + gen_rtvec (1, b1), UNSPEC_BNDRET); + vec[vec_len++] = gen_rtx_SET (BND64mode, b0, unspec0); + vec[vec_len++] = gen_rtx_SET (BND64mode, b1, unspec1); + } + if (pop) { pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); @@ -46053,9 +46157,18 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) bnd by default for current function. */ bool -ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED) +ix86_bnd_prefixed_insn_p (rtx insn) { - return false; + /* For call insns check special flag. */ + if (insn && CALL_P (insn)) + { + rtx call = get_call_rtx_from (insn); + if (call) + return CALL_EXPR_WITH_BOUNDS_P (call); + } + + /* All other insns are prefixed only if function is instrumented. */ + return chkp_function_instrumented_p (current_function_decl); } /* Calculate integer abs() using only SSE2 instructions. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a38c5d1..8f77343 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1655,6 +1655,10 @@ typedef struct ix86_args { int float_in_sse; /* Set to 1 or 2 for 32bit targets if SFmode/DFmode arguments should be passed in SSE registers. Otherwise 0. */ + int bnd_regno; /* next available bnd register number */ + int bnds_in_bt; /* number of bounds expected in BT. */ + int force_bnd_pass; /* number of bounds expected for stdarg arg. */ + int stdarg; /* Set to 1 if function is stdarg. */ enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise MS_ABI for ms abi. */ } CUMULATIVE_ARGS; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index db22b06..0f3d555 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -194,6 +194,7 @@ UNSPEC_BNDCU UNSPEC_BNDCN UNSPEC_MPX_FENCE + UNSPEC_BNDRET ]) (define_c_enum "unspecv" [ @@ -11549,7 +11550,9 @@ (define_insn "*call_value" [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz")) - (match_operand 2)))] + (match_operand 2))) + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))] "!SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) @@ -11557,7 +11560,9 @@ (define_insn "*sibcall_value" [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz")) - (match_operand 2)))] + (match_operand 2))) + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))] "SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) @@ -11604,6 +11609,8 @@ [(set (match_operand 0) (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rBwBz")) (match_operand 2))) + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET)) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])] "TARGET_64BIT && !SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[1]);" @@ -11627,6 +11634,8 @@ [(set (match_operand 0) (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lmBz")) (match_operand 2))) + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i")))] @@ -11638,6 +11647,8 @@ [(set (match_operand 0) (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz")) (match_operand 2))) + (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET)) + (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i")))] diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index da01c9a..9991bb2 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -631,14 +631,17 @@ (match_code "parallel") { unsigned creg_size = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers); + unsigned adop = GET_CODE (XVECEXP (op, 0, 0)) == SET + ? 4 + : 2; unsigned i; - if ((unsigned) XVECLEN (op, 0) != creg_size + 2) + if ((unsigned) XVECLEN (op, 0) != creg_size + adop) return false; for (i = 0; i < creg_size; i++) { - rtx elt = XVECEXP (op, 0, i+2); + rtx elt = XVECEXP (op, 0, i+adop); enum machine_mode mode; unsigned regno;