diff mbox

[i386,Pointer,Bounds,Checker,33/x] MPX ABI

Message ID 20141002142536.GG28697@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Oct. 2, 2014, 2:25 p.m. UTC
On 01 Oct 14:57, Vladimir Makarov wrote:
> On 2014-09-25 5:46 AM, Ilya Enkovich wrote:
> >2014-09-25 1:51 GMT+04:00 Ilya Enkovich <enkovich.gnu@gmail.com>:
> >>2014-09-24 23:09 GMT+04:00 Jeff Law <law@redhat.com>:
> >>>On 09/24/14 07:13, Ilya Enkovich wrote:
> >>>>
> >>>>I tried to generate PARALLEL with all regs set by call.  Here is a
> >>>>memset call I got:
> >>>>
> >>>>(call_insn 23 22 24 2 (set (parallel [
> >>>>                  (expr_list:REG_DEP_TRUE (reg:DI 0 ax)
> >>>>                      (const_int 0 [0]))
> >>>>                  (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0)
> >>>>                      (const_int 0 [0]))
> >>>>                  (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1)
> >>>>                      (const_int 0 [0]))
> >>>>              ])
> >>>>          (call/j (mem:QI (symbol_ref:DI ("memset") [flags 0x41]
> >>>
> >>>[ snip ]
> >>>Looks good.  This is the approved way to handle multiple results of a call.
> >>>
> >>>>
> >>>>During register allocation LRA generated a weird move instruction:
> >>>>
> >>>>(insn 63 0 0 (set (reg/f:DI 100)
> >>>>          (parallel [
> >>>>                  (expr_list:REG_DEP_TRUE (reg:DI 0 ax)
> >>>>                      (const_int 0 [0]))
> >>>>                  (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0)
> >>>>                      (const_int 0 [0]))
> >>>>                  (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1)
> >>>>                      (const_int 0 [0]))
> >>>>              ])) -1
> >>>>       (nil))
> >>>>
> >>>>Which caused ICE later in LRA.  This move happens because of
> >>>>REG_RETURNED (reg/f:DI 100) (see condition in inherit_in_ebb at
> >>>>lra-constraints.c:5312).  Thus this code in LRA doesn't accept
> >>>>PARALLEL dest for calls.
> >>>
> >>>This is a bug in LRA then.  Multiple return values aren't heavily used, so
> >>>I'm not surprised that its handling was missed in LRA.
> >>>
> >>>The question now is how to bundle things together in such a way as to make
> >>>it easy for Vlad to reproduce and fix this in LRA.
> >>>
> >>>Jeff
> >>
> >>I suppose it should be easy to reproduce using the same test case I
> >>use and some speudo patch which adds fake return values (e.g. xmm6 and
> >>xmm7) to calls.  Will try to make some minimal patch and test Vlad
> >>could work with.
> >>
> >>Ilya
> >
> >I couldn't reproduce the problem on a small test but chrome build
> >shows a lot of errors.  Due to the nature of the problem test's size
> >shouldn't matter, so I attach patch which emulates situation with
> >bounds regs (but uses xmm5 and xmm6 instead of bnd0 and bnd1) with a
> >preprocessed chrome file.
> >
> 
> The problem is in code introduced by Bernd in IRA and caller-saves.c
> in 2012.  It is basically an optimization for functions returning
> always the same result as one argument (e.g. memcpy returning 1st
> argument).
> 
> There are two possible solutions.  First one is to prohibit the
> optimizations when there is a parallel in SET.  Second one is to go
> deeper if the call result is guaranteed in the first element which
> is true for the patch.
> 
> For the first solution, the patch would be
> 
> Index: lra-constraints.c
> ===================================================================
> --- lra-constraints.c   (revision 215748)
> +++ lra-constraints.c   (working copy)
> @@ -5348,16 +5348,19 @@
>                   if (GET_CODE (pat) == PARALLEL)
>                     pat = XVECEXP (pat, 0, 0);
>                   dest = SET_DEST (pat);
> -                 start_sequence ();
> -                 emit_move_insn (cheap, copy_rtx (dest));
> -                 restore = get_insns ();
> -                 end_sequence ();
> -                 lra_process_new_insns (curr_insn, NULL, restore,
> -                                        "Inserting call parameter
> restore");
> -                 /* We don't need to save/restore of the pseudo from
> -                    this call.  */
> -                 usage_insns[regno].calls_num = calls_num;
> -                 bitmap_set_bit (&check_only_regs, regno);
> +                 if (REG_P (dest))
> +                   {
> +                     start_sequence ();
> +                     emit_move_insn (cheap, copy_rtx (dest));
> +                     restore = get_insns ();
> +                     end_sequence ();
> +                     lra_process_new_insns (curr_insn, NULL, restore,
> +                                            "Inserting call
> parameter restore");
> +                     /* We don't need to save/restore of the pseudo
> +                        from this call.  */
> +                     usage_insns[regno].calls_num = calls_num;
> +                     bitmap_set_bit (&check_only_regs, regno);
> +                   }
>                 }
>             }
>           to_inherit_num = 0;
> 
> 
> For the second solution, the patch is
> 
> 
> Index: lra-constraints.c
> ===================================================================
> --- lra-constraints.c   (revision 215748)
> +++ lra-constraints.c   (working copy)
> @@ -5348,16 +5348,25 @@
>                   if (GET_CODE (pat) == PARALLEL)
>                     pat = XVECEXP (pat, 0, 0);
>                   dest = SET_DEST (pat);
> -                 start_sequence ();
> -                 emit_move_insn (cheap, copy_rtx (dest));
> -                 restore = get_insns ();
> -                 end_sequence ();
> -                 lra_process_new_insns (curr_insn, NULL, restore,
> -                                        "Inserting call parameter
> restore");
> -                 /* We don't need to save/restore of the pseudo from
> -                    this call.  */
> -                 usage_insns[regno].calls_num = calls_num;
> -                 bitmap_set_bit (&check_only_regs, regno);
> +                 if (GET_CODE (dest) == PARALLEL)
> +                   {
> +                     dest = XVECEXP (dest, 0, 0);
> +                     if (GET_CODE (dest) == EXPR_LIST)
> +                       dest = XEXP (dest, 0);
> +                   }
> +                 if (REG_P (dest))
> +                   {
> +                     start_sequence ();
> +                     emit_move_insn (cheap, copy_rtx (dest));
> +                     restore = get_insns ();
> +                     end_sequence ();
> +                     lra_process_new_insns (curr_insn, NULL, restore,
> +                                            "Inserting call
> parameter restore");
> +                     /* We don't need to save/restore of the pseudo from
> +                        this call.      */
> +                     usage_insns[regno].calls_num = calls_num;
> +                     bitmap_set_bit (&check_only_regs, regno);
> +                   }
>                 }
>             }
> 
> 
> The first patch is safer but the second one is ok too.  I have no
> particular preferences.  Whatever we choose, analogous code in
> caller-saves.c should be changed too.
> 

Thanks for patches!  I decided to use the safest way here for now.  I'll patch I used in a separate thread.

Below is my current patch for MPX ABI.  It still has one ICE in LRA during SPEC2006 compilation.  Unfortunately it cannot be reproduced using my patch with fake usages.  I'm trying to find out the root cause of the problem but it seems to be deeper than a previous one.  Will you be able to look at it using mpx GCC branch (I need to update it with the latest sources first)?

Here is how this problem looks:

expmed.c:1551:1: internal compiler error: in copy_rtx, at rtl.c:356
 }
 ^
0xbc4e86 copy_rtx(rtx_def*)
        ../../gcc-pl/gcc/rtl.c:356
0xac6b6d remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:440
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6be5 remove_pseudos
        ../../gcc-pl/gcc/lra-spills.c:449
0xac6ebd spill_pseudos
        ../../gcc-pl/gcc/lra-spills.c:527
0xac7338 lra_spill()
        ../../gcc-pl/gcc/lra-spills.c:600
0xaa6085 lra(_IO_FILE*)
        ../../gcc-pl/gcc/lra.c:2272
0xa56aff do_reload
        ../../gcc-pl/gcc/ira.c:5311
0xa56e44 execute
        ../../gcc-pl/gcc/ira.c:5470
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <http://gcc.gnu.org/bugs.html> for instructions.

In a debugger I see:

Program received signal SIGSEGV, Segmentation fault.
0x0000000000ac6b04 in remove_pseudos (loc=0x7ffff7f58b18, insn=0x7ffff6fdc7e0) at ../../gcc-pl/gcc/lra-spills.c:439
439                                             false, false, true);
Missing separate debuginfos, use: debuginfo-install glibc-2.17-20.fc19.x86_64 gmp-5.1.1-2.fc19.x86_64 libmpc-1.0.1-1.fc19.x86_64 mpfr-3.1.1-2.fc19.x86_64
(gdb) l
434             *loc = copy_rtx (hard_reg);
435           else
436             {
437               rtx x = lra_eliminate_regs_1 (insn, pseudo_slots[i].mem,
438                                             GET_MODE (pseudo_slots[i].mem),
439                                             false, false, true);
440               *loc = x != pseudo_slots[i].mem ? x : copy_rtx (x);
441             }
442           return;
443         }
(gdb) p *loc
$1 = (rtx) 0x7ffff7f58ae0
(gdb) pr
warning: Expression is not an assignment (and might have no effect)
(reg/f:SI 814)
(gdb) p pseudo_slots[i]
$2 = {slot_num = 0, next = 0x0, first = 0x24952d8, mem = 0x8}
(gdb) p insn
$3 = (rtx_insn *) 0x7ffff6fdc7e0
(gdb) pr
warning: Expression is not an assignment (and might have no effect)
(call_insn 1299 1298 1300 110 (set (parallel [
                (expr_list:REG_DEP_TRUE (reg:SI 0 ax)
                    (const_int 0 [0]))
                (expr_list:REG_DEP_TRUE (reg:BND64 77 bnd0)
                    (const_int 0 [0]))
                (expr_list:REG_DEP_TRUE (reg:BND64 78 bnd1)
                    (const_int 0 [0]))
            ])
        (call/j (mem:QI (symbol_ref:SI ("adjust_address_1") [flags 0x41] <function_decl 0x7ffff7022438 adjust_address_1.chkp>) [0 adjust_address_1.chkp S1 A8])
            (const_int 32 [0x20]))) expmed.c:1321 660 {*call_value}
     (expr_list:REG_UNUSED (reg:BND64 78 bnd1)
        (expr_list:REG_CALL_DECL (symbol_ref:SI ("adjust_address_1") [flags 0x41] <function_decl 0x7ffff7022438 adjust_address_1.chkp>)
            (nil)))
    (expr_list:SI (use (mem/f:SI (reg/f:SI 814) [0  S4 A32]))
        (expr_list:SI (use (mem:SI (plus:SI (reg/f:SI 814)
                        (const_int 4 [0x4])) [0  S4 A32]))
            (expr_list:DI (use (mem:DI (plus:SI (reg/f:SI 814)
                            (const_int 8 [0x8])) [0  S8 A32]))
                (expr_list:SI (use (mem:SI (plus:SI (reg/f:SI 814)
                                (const_int 16 [0x10])) [0  S4 A32]))
                    (expr_list:SI (use (mem:SI (plus:SI (reg/f:SI 814)
                                    (const_int 20 [0x14])) [0  S4 A32]))
                        (nil)))))))


pseudo_slots[i].mem doesn't look like something expected by LRA here. r814 is just a copy of stack pointer made before preparation of function arguments on a stack.

Probably you may guess where the problem might be here?

Thanks,
Ilya
--
2014-10-02  Ilya Enkovich  <ilya.enkovich@intel.com>

	* config/i386/i386.c (ix86_option_override_internal): Do not
	support x32 with MPX.
	(init_cumulative_args): Init stdarg, bnd_regno, bnds_in_bt
	and force_bnd_pass.
	(function_arg_advance_32): Return number of used integer
	registers.
	(function_arg_advance_64): Likewise.
	(function_arg_advance_ms_64): Likewise.
	(ix86_function_arg_advance): Handle pointer bounds.
	(ix86_function_arg): Likewise.
	(ix86_function_value_regno_p): Mark fisrt bounds registers as
	possible function value.
	(ix86_function_value_1): Handle pointer bounds type/mode
	(ix86_return_in_memory): Likewise.
	(ix86_print_operand): Analyse insn to decide abounf "bnd" prefix.
	(ix86_expand_call): Generate returned bounds.
	(ix86_bnd_prefixed_insn_p): Check if we have instrumented call
	or function.
	* config/i386/i386.h (ix86_args): Add bnd_regno, bnds_in_bt,
	force_bnd_pass and stdarg fields.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2dca37c..7576735 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3727,6 +3727,9 @@  ix86_option_override_internal (bool main_args_p,
   if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
     error ("Intel MPX does not support x32");
 
+  if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
+    error ("Intel MPX does not support x32");
+
   if (!strcmp (opts->x_ix86_arch_string, "generic"))
     error ("generic CPU can be used only for %stune=%s %s",
 	   prefix, suffix, sw);
@@ -6216,10 +6219,15 @@  init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
      FIXME: once typesytem is fixed, we won't need this code anymore.  */
   if (i && i->local && i->can_change_signature)
     fntype = TREE_TYPE (fndecl);
+  cum->stdarg = stdarg_p (fntype);
   cum->maybe_vaarg = (fntype
 		      ? (!prototype_p (fntype) || stdarg_p (fntype))
 		      : !libname);
 
+  cum->bnd_regno = FIRST_BND_REG;
+  cum->bnds_in_bt = 0;
+  cum->force_bnd_pass = 0;
+
   if (!TARGET_64BIT)
     {
       /* If there are variable arguments, then we won't pass anything
@@ -7154,13 +7162,17 @@  construct_container (enum machine_mode mode, enum machine_mode orig_mode,
 
 /* Update the data in CUM to advance over an argument of mode MODE
    and data type TYPE.  (TYPE is null for libcalls where that information
-   may not be available.)  */
+   may not be available.)
 
-static void
+   Return a number of integer regsiters advanced over.  */
+
+static int
 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 			 const_tree type, HOST_WIDE_INT bytes,
 			 HOST_WIDE_INT words)
 {
+  int res = 0;
+
   switch (mode)
     {
     default:
@@ -7178,7 +7190,8 @@  function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
       cum->words += words;
       cum->nregs -= words;
       cum->regno += words;
-
+      if (cum->nregs >= 0)
+	res = words;
       if (cum->nregs <= 0)
 	{
 	  cum->nregs = 0;
@@ -7249,9 +7262,11 @@  function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 	}
       break;
     }
+
+  return res;
 }
 
-static void
+static int
 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 			 const_tree type, HOST_WIDE_INT words, bool named)
 {
@@ -7260,7 +7275,7 @@  function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
   if (!named && (VALID_AVX512F_REG_MODE (mode)
 		 || VALID_AVX256_REG_MODE (mode)))
-    return;
+    return 0;
 
   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
@@ -7269,16 +7284,18 @@  function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
       cum->sse_nregs -= sse_nregs;
       cum->regno += int_nregs;
       cum->sse_regno += sse_nregs;
+      return int_nregs;
     }
   else
     {
       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
       cum->words = (cum->words + align - 1) & ~(align - 1);
       cum->words += words;
+      return 0;
     }
 }
 
-static void
+static int
 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
 			    HOST_WIDE_INT words)
 {
@@ -7290,7 +7307,9 @@  function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
     {
       cum->nregs -= 1;
       cum->regno += 1;
+      return 1;
     }
+  return 0;
 }
 
 /* Update the data in CUM to advance over an argument of mode MODE and
@@ -7303,6 +7322,7 @@  ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   HOST_WIDE_INT bytes, words;
+  int nregs;
 
   if (mode == BLKmode)
     bytes = int_size_in_bytes (type);
@@ -7313,12 +7333,51 @@  ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
   if (type)
     mode = type_natural_mode (type, NULL, false);
 
+  if ((type && POINTER_BOUNDS_TYPE_P (type))
+      || POINTER_BOUNDS_MODE_P (mode))
+    {
+      /* If we pass bounds in BT then just update remained bounds count.  */
+      if (cum->bnds_in_bt)
+	{
+	  cum->bnds_in_bt--;
+	  return;
+	}
+
+      /* Update remained number of bounds to force.  */
+      if (cum->force_bnd_pass)
+	cum->force_bnd_pass--;
+
+      cum->bnd_regno++;
+
+      return;
+    }
+
+  /* The first arg not going to Bounds Tables resets this counter.  */
+  cum->bnds_in_bt = 0;
+  /* For unnamed args we always pass bounds to avoid bounds mess when
+     passed and received types do not match.  If bounds do not follow
+     unnamed arg, still pretend required number of bounds were passed.  */
+  if (cum->force_bnd_pass)
+    {
+      cum->bnd_regno += cum->force_bnd_pass;
+      cum->force_bnd_pass = 0;
+    }
+
   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
-    function_arg_advance_ms_64 (cum, bytes, words);
+    nregs = function_arg_advance_ms_64 (cum, bytes, words);
   else if (TARGET_64BIT)
-    function_arg_advance_64 (cum, mode, type, words, named);
+    nregs = function_arg_advance_64 (cum, mode, type, words, named);
   else
-    function_arg_advance_32 (cum, mode, type, bytes, words);
+    nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
+
+  /* For stdarg we expect bounds to be passed for each value passed
+     in register.  */
+  if (cum->stdarg)
+    cum->force_bnd_pass = nregs;
+  /* For pointers passed in memory we expect bounds passed in Bounds
+     Table.  */
+  if (!nregs)
+    cum->bnds_in_bt = chkp_type_bounds_count (type);
 }
 
 /* Define where to put the arguments to a function.
@@ -7553,6 +7612,23 @@  ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
   HOST_WIDE_INT bytes, words;
   rtx arg;
 
+  /* All pointer bounds argumntas are handled separately here.  */
+  if ((type && POINTER_BOUNDS_TYPE_P (type))
+      || POINTER_BOUNDS_MODE_P (mode))
+    {
+      /* Return NULL if bounds are forced to go in Bounds Table.  */
+      if (cum->bnds_in_bt)
+	arg = NULL;
+      /* Return the next available bound reg if any.  */
+      else if (cum->bnd_regno <= LAST_BND_REG)
+	arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
+      /* Return the next special slot number otherwise.  */
+      else
+	arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
+
+      return arg;
+    }
+
   if (mode == BLKmode)
     bytes = int_size_in_bytes (type);
   else
@@ -7826,6 +7902,9 @@  ix86_function_value_regno_p (const unsigned int regno)
     case SI_REG:
       return TARGET_64BIT && ix86_abi != MS_ABI;
 
+    case FIRST_BND_REG:
+      return chkp_function_instrumented_p (current_function_decl);
+
       /* Complex values are returned in %st(0)/%st(1) pair.  */
     case ST0_REG:
     case ST1_REG:
@@ -8002,7 +8081,10 @@  ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
     fn = fntype_or_decl;
   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
 
-  if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
+  if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
+      || POINTER_BOUNDS_MODE_P (mode))
+    return gen_rtx_REG (BNDmode, FIRST_BND_REG);
+  else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
     return function_value_ms_64 (orig_mode, mode, valtype);
   else if (TARGET_64BIT)
     return function_value_64 (orig_mode, mode, valtype);
@@ -8111,6 +8193,9 @@  ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
   const enum machine_mode mode = type_natural_mode (type, NULL, true);
   HOST_WIDE_INT size;
 
+  if (POINTER_BOUNDS_TYPE_P (type))
+    return false;
+
   if (TARGET_64BIT)
     {
       if (ix86_function_type_abi (fntype) == MS_ABI)
@@ -15411,7 +15496,7 @@  ix86_print_operand (FILE *file, rtx x, int code)
 	  return;
 
 	case '!':
-	  if (ix86_bnd_prefixed_insn_p (NULL_RTX))
+	  if (ix86_bnd_prefixed_insn_p (current_output_insn))
 	    fputs ("bnd ", file);
 	  return;
 
@@ -25009,8 +25094,21 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
     }
 
   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+
   if (retval)
-    call = gen_rtx_SET (VOIDmode, retval, call);
+    {
+      /* We should add bounds as destination register in case
+	 pointer with bounds may be returned.  */
+      if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
+	{
+	  rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
+	  rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
+	  retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
+	  chkp_put_regs_to_expr_list (retval);
+	}
+
+      call = gen_rtx_SET (VOIDmode, retval, call);
+    }
   vec[vec_len++] = call;
 
   if (pop)
@@ -46233,9 +46331,18 @@  ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
    bnd by default for current function.  */
 
 bool
-ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED)
+ix86_bnd_prefixed_insn_p (rtx insn)
 {
-  return false;
+  /* For call insns check special flag.  */
+  if (insn && CALL_P (insn))
+    {
+      rtx call = get_call_rtx_from (insn);
+      if (call)
+	return CALL_EXPR_WITH_BOUNDS_P (call);
+    }
+
+  /* All other insns are prefixed only if function is instrumented.  */
+  return chkp_function_instrumented_p (current_function_decl);
 }
 
 /* Calculate integer abs() using only SSE2 instructions.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8f4897c..81de1f9 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1661,6 +1661,10 @@  typedef struct ix86_args {
   int float_in_sse;		/* Set to 1 or 2 for 32bit targets if
 				   SFmode/DFmode arguments should be passed
 				   in SSE registers.  Otherwise 0.  */
+  int bnd_regno;                /* next available bnd register number */
+  int bnds_in_bt;               /* number of bounds expected in BT.  */
+  int force_bnd_pass;           /* number of bounds expected for stdarg arg.  */
+  int stdarg;                   /* Set to 1 if function is stdarg.  */
   enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
  				   MS_ABI for ms abi.  */
 } CUMULATIVE_ARGS;