diff mbox

[PATCHv2,4/7,ARM,V8M] ARMv8-M Security Extension's cmse_nonsecure_entry: clear registers

Message ID 5811D039.2030401@arm.com
State New
Headers show

Commit Message

Andre Vieira (lists) Oct. 27, 2016, 10 a.m. UTC
On 26/10/16 17:30, Kyrill Tkachov wrote:
> 
> On 26/10/16 17:26, Andre Vieira (lists) wrote:
>> On 26/10/16 13:51, Kyrill Tkachov wrote:
>>> Hi Andre,
>>>
>>> On 25/10/16 17:29, Andre Vieira (lists) wrote:
>>>> On 24/08/16 12:01, Andre Vieira (lists) wrote:
>>>>> On 25/07/16 14:23, Andre Vieira (lists) wrote:
>>>>>> This patch extends support for the ARMv8-M Security Extensions
>>>>>> 'cmse_nonsecure_entry' attribute to safeguard against leak of
>>>>>> information through unbanked registers.
>>>>>>
>>>>>> When returning from a nonsecure entry function we clear all
>>>>>> caller-saved
>>>>>> registers that are not used to pass return values, by writing either
>>>>>> the
>>>>>> LR, in case of general purpose registers, or the value 0, in case
>>>>>> of FP
>>>>>> registers. We use the LR to write to APSR and FPSCR too. We
>>>>>> currently do
>>>>>> not support entry functions that pass arguments or return
>>>>>> variables on
>>>>>> the stack and we diagnose this. This patch relies on the existing
>>>>>> code
>>>>>> to make sure callee-saved registers used in cmse_nonsecure_entry
>>>>>> functions are saved and restored thus retaining their nonsecure mode
>>>>>> value, this should be happening already as it is required by AAPCS.
>>>>>>
>>>>>> This patch also clears padding bits for cmse_nonsecure_entry
>>>>>> functions
>>>>>> with struct and union return types. For unions a bit is only
>>>>>> considered
>>>>>> a padding bit if it is an unused bit in every field of that union.
>>>>>> The
>>>>>> function that calculates these is used in a later patch to do the
>>>>>> same
>>>>>> for arguments of cmse_nonsecure_call's.
>>>>>>
>>>>>> *** gcc/ChangeLog ***
>>>>>> 2016-07-25  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>>               Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>>
>>>>>>           * config/arm/arm.c (output_return_instruction): Clear
>>>>>>           registers.
>>>>>>           (thumb2_expand_return): Likewise.
>>>>>>           (thumb1_expand_epilogue): Likewise.
>>>>>>           (thumb_exit): Likewise.
>>>>>>           (arm_expand_epilogue): Likewise.
>>>>>>           (cmse_nonsecure_entry_clear_before_return): New.
>>>>>>           (comp_not_to_clear_mask_str_un): New.
>>>>>>           (compute_not_to_clear_mask): New.
>>>>>>           * config/arm/thumb1.md (*epilogue_insns): Change length
>>>>>> attribute.
>>>>>>           * config/arm/thumb2.md (*thumb2_return): Likewise.
>>>>>>
>>>>>> *** gcc/testsuite/ChangeLog ***
>>>>>> 2016-07-25  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>>               Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>>
>>>>>>           * gcc.target/arm/cmse/cmse.exp: Test different multilibs
>>>>>> separate.
>>>>>>           * gcc.target/arm/cmse/struct-1.c: New.
>>>>>>           * gcc.target/arm/cmse/bitfield-1.c: New.
>>>>>>           * gcc.target/arm/cmse/bitfield-2.c: New.
>>>>>>           * gcc.target/arm/cmse/bitfield-3.c: New.
>>>>>>           * gcc.target/arm/cmse/baseline/cmse-2.c: Test that
>>>>>> registers are
>>>>>> cleared.
>>>>>>           * gcc.target/arm/cmse/mainline/soft/cmse-5.c: New.
>>>>>>           * gcc.target/arm/cmse/mainline/hard/cmse-5.c: New.
>>>>>>           * gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c: New.
>>>>>>           * gcc.target/arm/cmse/mainline/softfp/cmse-5.c: New.
>>>>>>           * gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c: New.
>>>>>>
>>>>> Updated this patch to correctly clear only the cumulative
>>>>> exception-status (0-4,7) and the condition code bits (28-31) of the
>>>>> FPSCR. I also adapted the code to be handle the bigger floating point
>>>>> register files.
>>>>>
>>>>> ----
>>>>>
>>>>> This patch extends support for the ARMv8-M Security Extensions
>>>>> 'cmse_nonsecure_entry' attribute to safeguard against leak of
>>>>> information through unbanked registers.
>>>>>
>>>>> When returning from a nonsecure entry function we clear all
>>>>> caller-saved
>>>>> registers that are not used to pass return values, by writing
>>>>> either the
>>>>> LR, in case of general purpose registers, or the value 0, in case
>>>>> of FP
>>>>> registers. We use the LR to write to APSR. For FPSCR we clear only the
>>>>> cumulative exception-status (0-4, 7) and the condition code bits
>>>>> (28-31). We currently do not support entry functions that pass
>>>>> arguments
>>>>> or return variables on the stack and we diagnose this. This patch
>>>>> relies
>>>>> on the existing code to make sure callee-saved registers used in
>>>>> cmse_nonsecure_entry functions are saved and restored thus retaining
>>>>> their nonsecure mode value, this should be happening already as it is
>>>>> required by AAPCS.
>>>>>
>>>>> This patch also clears padding bits for cmse_nonsecure_entry functions
>>>>> with struct and union return types. For unions a bit is only
>>>>> considered
>>>>> a padding bit if it is an unused bit in every field of that union. The
>>>>> function that calculates these is used in a later patch to do the same
>>>>> for arguments of cmse_nonsecure_call's.
>>>>>
>>>>> *** gcc/ChangeLog ***
>>>>> 2016-07-xx  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>               Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>
>>>>>           * config/arm/arm.c (output_return_instruction): Clear
>>>>>           registers.
>>>>>           (thumb2_expand_return): Likewise.
>>>>>           (thumb1_expand_epilogue): Likewise.
>>>>>           (thumb_exit): Likewise.
>>>>>           (arm_expand_epilogue): Likewise.
>>>>>           (cmse_nonsecure_entry_clear_before_return): New.
>>>>>           (comp_not_to_clear_mask_str_un): New.
>>>>>           (compute_not_to_clear_mask): New.
>>>>>           * config/arm/thumb1.md (*epilogue_insns): Change length
>>>>> attribute.
>>>>>           * config/arm/thumb2.md (*thumb2_return): Duplicate
>>>>> pattern for
>>>>>           cmse_nonsecure_entry functions.
>>>>>
>>>>> *** gcc/testsuite/ChangeLog ***
>>>>> 2016-07-xx  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>               Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>
>>>>>           * gcc.target/arm/cmse/cmse.exp: Test different multilibs
>>>>> separate.
>>>>>           * gcc.target/arm/cmse/struct-1.c: New.
>>>>>           * gcc.target/arm/cmse/bitfield-1.c: New.
>>>>>           * gcc.target/arm/cmse/bitfield-2.c: New.
>>>>>           * gcc.target/arm/cmse/bitfield-3.c: New.
>>>>>           * gcc.target/arm/cmse/baseline/cmse-2.c: Test that registers
>>>>> are
>>>>> cleared.
>>>>>           * gcc.target/arm/cmse/mainline/soft/cmse-5.c: New.
>>>>>           * gcc.target/arm/cmse/mainline/hard/cmse-5.c: New.
>>>>>           * gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c: New.
>>>>>           * gcc.target/arm/cmse/mainline/softfp/cmse-5.c: New.
>>>>>           * gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c: New.
>>>>>
>>>> Hi,
>>>>
>>>> Rebased previous patch on top of trunk as requested. No changes to
>>>> ChangeLog.
>>>>
>>>> Cheers,
>>>> Andre
>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>> index
>>> bb81e5662e81a26c7d3ccf9f749e8e356e6de35e..c6260323ecfd2f2842e6a5aab06b67da16619c73
>>>
>>> 100644
>>> --- a/gcc/config/arm/arm.c
>>> +++ b/gcc/config/arm/arm.c
>>> @@ -17496,6 +17496,279 @@ note_invalid_constants (rtx_insn *insn,
>>> HOST_WIDE_INT address, int do_pushes)
>>>     return;
>>>   }
>>>   +/* This function computes the clear mask and PADDING_BITS_TO_CLEAR
>>> for
>>> structs
>>> +   and unions in the context of ARMv8-M Security Extensions.  It is
>>> used as a
>>> +   helper function for both 'cmse_nonsecure_call' and
>>> 'cmse_nonsecure_entry'
>>> +   functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to
>>> either one
>>> +   or four masks, depending on whether it is being computed for a
>>> +   'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call'
>>> argument
>>> +   respectively.  The tree for the type of the argument or a field
>>> within an
>>> +   argument is passed in ARG_TYPE, the current register this argument
>>> or field
>>> +   starts in is kept in the pointer REGNO and updated accordingly, the
>>> bit this
>>> +   argument or field starts at is passed in STARTING_BIT and the last
>>> used bit
>>> +   is kept in LAST_USED_BIT which is also updated accordingly.  */
>>> +
>>> +static unsigned HOST_WIDE_INT
>>> +comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
>>> +                   uint32_t * padding_bits_to_clear,
>>> +                   unsigned starting_bit, int * last_used_bit)
>>> +
>>> +{
>>> +  unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
>>> +
>>> +  if (TREE_CODE (arg_type) == RECORD_TYPE)
>>> +    {
>>> +      unsigned current_bit = starting_bit;
>>> +      tree field;
>>> +      long int offset, size;
>>> +
>>> +
>>> +      field = TYPE_FIELDS (arg_type);
>>> +      while (field)
>>> +    {
>>> +      /* The offset within a structure is always an offset from
>>> +         the start of that structure.  Make sure we take that into the
>>> +         calculation of the register based offset that we use here.  */
>>> +      offset = starting_bit;
>>> +      offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
>>> +      offset %= 32;
>>> +
>>> +      /* This is the actual size of the field, for bitfields this is
>>> the
>>> +         bitfield width and not the container size.  */
>>> +      size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
>>> +
>>> +      if (*last_used_bit != offset)
>>> +        {
>>> +          if (offset < *last_used_bit)
>>> +        {
>>> +          /* This field's offset is before the 'last_used_bit', that
>>> +             means this field goes on the next register.  So we need to
>>> +             pad the rest of the current register and increase the
>>> +             register number.  */
>>> +          uint32_t mask;
>>> +          mask  = UINT32_MAX - ((uint32_t) 1 << *last_used_bit);
>>> +          mask++;
>>> +
>>> +          *(padding_bits_to_clear + *regno) |= mask;
>>>
>>> padding_bits_to_clear[*regno] |= mask;
>>>
>>> +          not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
>>> +          (*regno)++;
>>> +        }
>>> +          else
>>> +        {
>>> +          /* Otherwise we pad the bits between the last field's end and
>>> +             the start of the new field.  */
>>> +          uint32_t mask;
>>> +
>>> +          mask = UINT32_MAX >> (32 - offset);
>>> +          mask -= ((uint32_t) 1 << *last_used_bit) - 1;
>>> +          *(padding_bits_to_clear + *regno) |= mask;
>>>
>>> Likewise.
>>>
>>> +        }
>>> +          current_bit = offset;
>>> +        }
>>> +
>>> +      /* Calculate further padding bits for inner structs/unions
>>> too.  */
>>> +      if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
>>> +        {
>>> +          *last_used_bit = current_bit;
>>> +          not_to_clear_reg_mask
>>> +        |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
>>> +                          padding_bits_to_clear, offset,
>>> +                          last_used_bit);
>>> +        }
>>> +      else
>>> +        {
>>> +          /* Update 'current_bit' with this field's size.  If the
>>> +         'current_bit' lies in a subsequent register, update 'regno'
>>> and
>>> +         reset 'current_bit' to point to the current bit in that new
>>> +         register.  */
>>> +          current_bit += size;
>>> +          while (current_bit >= 32)
>>> +        {
>>> +          current_bit-=32;
>>> +          not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
>>> +          (*regno)++;
>>> +        }
>>> +          *last_used_bit = current_bit;
>>> +        }
>>> +
>>> +      field = TREE_CHAIN (field);
>>> +    }
>>> +      not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
>>> +    }
>>> +  else if (TREE_CODE (arg_type) == UNION_TYPE)
>>> +    {
>>> +      tree field, field_t;
>>> +      int i, regno_t, field_size;
>>> +      int max_reg = -1;
>>> +      int max_bit = -1;
>>> +      uint32_t mask;
>>> +      uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
>>> +    = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX};
>>> +
>>> +      /* To compute the padding bits in a union we only consider
>>> bits as
>>> +     padding bits if they are always either a padding bit or fall
>>> outside a
>>> +     fields size for all fields in the union.  */
>>> +      field = TYPE_FIELDS (arg_type);
>>> +      while (field)
>>> +    {
>>> +      uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
>>> +        = {0U, 0U, 0U, 0U};
>>> +      int last_used_bit_t = *last_used_bit;
>>> +      regno_t = *regno;
>>> +      field_t = TREE_TYPE (field);
>>> +
>>> +      /* If the field's type is either a record or a union make sure to
>>> +         compute their padding bits too.  */
>>> +      if (RECORD_OR_UNION_TYPE_P (field_t))
>>> +        not_to_clear_reg_mask
>>> +          |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
>>> +                        &padding_bits_to_clear_t[0],
>>> +                        starting_bit, &last_used_bit_t);
>>> +      else
>>> +        {
>>> +          field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
>>> +          regno_t = (field_size / 32) + *regno;
>>> +          last_used_bit_t = (starting_bit + field_size) % 32;
>>> +        }
>>> +
>>> +      for (i = *regno; i < regno_t; i++)
>>> +        {
>>> +          /* For all but the last register used by this field only keep
>>> the
>>> +         padding bits that were padding bits in this field.  */
>>> +          padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
>>> +        }
>>> +
>>> +        /* For the last register, keep all padding bits that were
>>> padding
>>> +           bits in this field and any padding bits that are still valid
>>> +           as padding bits but fall outside of this field's size.  */
>>> +        mask = (UINT32_MAX - ((uint32_t) 1 << last_used_bit_t)) + 1;
>>> +        padding_bits_to_clear_res[regno_t]
>>> +          &= padding_bits_to_clear_t[regno_t] | mask;
>>> +
>>> +      /* Update the maximum size of the fields in terms of registers
>>> used
>>> +         ('max_reg') and the 'last_used_bit' in said register.  */
>>> +      if (max_reg < regno_t)
>>> +        {
>>> +          max_reg = regno_t;
>>> +          max_bit = last_used_bit_t;
>>> +        }
>>> +      else if (max_reg == regno_t && max_bit < last_used_bit_t)
>>> +        max_bit = last_used_bit_t;
>>> +
>>> +      field = TREE_CHAIN (field);
>>> +    }
>>> +
>>> +      /* Update the current padding_bits_to_clear using the
>>> intersection of the
>>> +     padding bits of all the fields.  */
>>> +      for (i=*regno; i < max_reg; i++)
>>> +    padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
>>> +
>>>
>>> watch the spacing in the 'for' definition.
>>>
>>>   +      /* Do not keep trailing padding bits, we do not know yet
>>> whether
>>> this
>>> +     is the end of the argument.  */
>>> +      mask = ((uint32_t) 1 << max_bit) - 1;
>>> +      padding_bits_to_clear[max_reg]
>>> +    |= padding_bits_to_clear_res[max_reg] & mask;
>>> +
>>> +      *regno = max_reg;
>>> +      *last_used_bit = max_bit;
>>> +    }
>>> +  else
>>> +    /* This function should only be used for structs and unions.  */
>>> +    gcc_unreachable ();
>>> +
>>> +  return not_to_clear_reg_mask;
>>> +}
>>> +
>>> +/* In the context of ARMv8-M Security Extensions, this function is used
>>> for both
>>> +   'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to
>>> compute what
>>> +   registers are used when returning or passing arguments, which is
>>> then
>>> +   returned as a mask.  It will also compute a mask to indicate
>>> padding/unused
>>> +   bits for each of these registers, and passes this through the
>>> +   PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is
>>> passed in
>>> +   ARG_TYPE, the rtl representation of the argument is passed in
>>> ARG_RTX and
>>> +   the starting register used to pass this argument or return value is
>>> passed
>>> +   in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to
>>> compute these
>>> +   for struct and union types.  */
>>> +
>>> +static unsigned HOST_WIDE_INT
>>> +compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
>>> +                 uint32_t * padding_bits_to_clear)
>>> +
>>> +{
>>> +  int last_used_bit = 0;
>>> +  unsigned HOST_WIDE_INT not_to_clear_mask;
>>> +
>>> +  if (RECORD_OR_UNION_TYPE_P (arg_type))
>>> +    {
>>> +      not_to_clear_mask
>>> +    = comp_not_to_clear_mask_str_un (arg_type, &regno,
>>> +                     padding_bits_to_clear, 0,
>>> +                     &last_used_bit);
>>> +
>>> +
>>> +      /* If the 'last_used_bit' is not zero, that means we are still
>>> using a
>>> +     part of the last 'regno'.  In such cases we must clear the
>>> trailing
>>> +     bits.  Otherwise we are not using regno and we should mark it
>>> as to
>>> +     clear.  */
>>> +      if (last_used_bit != 0)
>>> +    *(padding_bits_to_clear + regno)
>>> +      |= UINT32_MAX - ((uint32_t) 1 << last_used_bit) + 1;
>>>
>>> padding_bits_to_clear[regno] |= ...
>>>
>>> +      else
>>> +    not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
>>> +    }
>>> +  else
>>> +    {
>>> +      not_to_clear_mask = 0;
>>> +      /* We are not dealing with structs nor unions.  So these
>>> arguments may be
>>> +     passed in floating point registers too.  In some cases a
>>> BLKmode is
>>> +     used when returning or passing arguments in multiple VFP
>>> registers.  */
>>> +      if (GET_MODE (arg_rtx) == BLKmode)
>>> +    {
>>> +      int i, arg_regs;
>>> +      rtx reg;
>>> +
>>> +      /* This should really only occur when dealing with the hard-float
>>> +         ABI.  */
>>> +      gcc_assert (TARGET_HARD_FLOAT_ABI);
>>> +
>>> +      for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
>>> +        {
>>> +          reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
>>> +          gcc_assert (REG_P (reg));
>>> +
>>> +          not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
>>> +
>>> +          /* If we are dealing with DF mode, make sure we don't
>>> +         clear either of the registers it addresses.  */
>>> +          arg_regs = ARM_NUM_REGS (GET_MODE (reg));
>>>
>>> Better assert here that you're indeed dealing with DFmode and/or you
>>> have 2 registers.
>>>
>> The current code actually works for larger modes too. I don't think code
>> will ever be generated with larger types, but why assert if it works
>> anyway?
> 
> ok, no need to assert here then.
> I suppose it doesn't add much if the code handles the other modes fine.
> 
> Kyrill
> 
Hi,

Reworked comments. No change to ChangeLogs.

Cheers,
Andre

Comments

Kyrill Tkachov Oct. 27, 2016, 10:44 a.m. UTC | #1
On 27/10/16 11:00, Andre Vieira (lists) wrote:
> On 26/10/16 17:30, Kyrill Tkachov wrote:
>> On 26/10/16 17:26, Andre Vieira (lists) wrote:
>>> On 26/10/16 13:51, Kyrill Tkachov wrote:
>>>> Hi Andre,
>>>>
>>>> On 25/10/16 17:29, Andre Vieira (lists) wrote:
>>>>> On 24/08/16 12:01, Andre Vieira (lists) wrote:
>>>>>> On 25/07/16 14:23, Andre Vieira (lists) wrote:
>>>>>>> This patch extends support for the ARMv8-M Security Extensions
>>>>>>> 'cmse_nonsecure_entry' attribute to safeguard against leak of
>>>>>>> information through unbanked registers.
>>>>>>>
>>>>>>> When returning from a nonsecure entry function we clear all
>>>>>>> caller-saved
>>>>>>> registers that are not used to pass return values, by writing either
>>>>>>> the
>>>>>>> LR, in case of general purpose registers, or the value 0, in case
>>>>>>> of FP
>>>>>>> registers. We use the LR to write to APSR and FPSCR too. We
>>>>>>> currently do
>>>>>>> not support entry functions that pass arguments or return
>>>>>>> variables on
>>>>>>> the stack and we diagnose this. This patch relies on the existing
>>>>>>> code
>>>>>>> to make sure callee-saved registers used in cmse_nonsecure_entry
>>>>>>> functions are saved and restored thus retaining their nonsecure mode
>>>>>>> value, this should be happening already as it is required by AAPCS.
>>>>>>>
>>>>>>> This patch also clears padding bits for cmse_nonsecure_entry
>>>>>>> functions
>>>>>>> with struct and union return types. For unions a bit is only
>>>>>>> considered
>>>>>>> a padding bit if it is an unused bit in every field of that union.
>>>>>>> The
>>>>>>> function that calculates these is used in a later patch to do the
>>>>>>> same
>>>>>>> for arguments of cmse_nonsecure_call's.
>>>>>>>
>>>>>>> *** gcc/ChangeLog ***
>>>>>>> 2016-07-25  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>>>                Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>>>
>>>>>>>            * config/arm/arm.c (output_return_instruction): Clear
>>>>>>>            registers.
>>>>>>>            (thumb2_expand_return): Likewise.
>>>>>>>            (thumb1_expand_epilogue): Likewise.
>>>>>>>            (thumb_exit): Likewise.
>>>>>>>            (arm_expand_epilogue): Likewise.
>>>>>>>            (cmse_nonsecure_entry_clear_before_return): New.
>>>>>>>            (comp_not_to_clear_mask_str_un): New.
>>>>>>>            (compute_not_to_clear_mask): New.
>>>>>>>            * config/arm/thumb1.md (*epilogue_insns): Change length
>>>>>>> attribute.
>>>>>>>            * config/arm/thumb2.md (*thumb2_return): Likewise.
>>>>>>>
>>>>>>> *** gcc/testsuite/ChangeLog ***
>>>>>>> 2016-07-25  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>>>                Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>>>
>>>>>>>            * gcc.target/arm/cmse/cmse.exp: Test different multilibs
>>>>>>> separate.
>>>>>>>            * gcc.target/arm/cmse/struct-1.c: New.
>>>>>>>            * gcc.target/arm/cmse/bitfield-1.c: New.
>>>>>>>            * gcc.target/arm/cmse/bitfield-2.c: New.
>>>>>>>            * gcc.target/arm/cmse/bitfield-3.c: New.
>>>>>>>            * gcc.target/arm/cmse/baseline/cmse-2.c: Test that
>>>>>>> registers are
>>>>>>> cleared.
>>>>>>>            * gcc.target/arm/cmse/mainline/soft/cmse-5.c: New.
>>>>>>>            * gcc.target/arm/cmse/mainline/hard/cmse-5.c: New.
>>>>>>>            * gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c: New.
>>>>>>>            * gcc.target/arm/cmse/mainline/softfp/cmse-5.c: New.
>>>>>>>            * gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c: New.
>>>>>>>
>>>>>> Updated this patch to correctly clear only the cumulative
>>>>>> exception-status (0-4,7) and the condition code bits (28-31) of the
>>>>>> FPSCR. I also adapted the code to be handle the bigger floating point
>>>>>> register files.
>>>>>>
>>>>>> ----
>>>>>>
>>>>>> This patch extends support for the ARMv8-M Security Extensions
>>>>>> 'cmse_nonsecure_entry' attribute to safeguard against leak of
>>>>>> information through unbanked registers.
>>>>>>
>>>>>> When returning from a nonsecure entry function we clear all
>>>>>> caller-saved
>>>>>> registers that are not used to pass return values, by writing
>>>>>> either the
>>>>>> LR, in case of general purpose registers, or the value 0, in case
>>>>>> of FP
>>>>>> registers. We use the LR to write to APSR. For FPSCR we clear only the
>>>>>> cumulative exception-status (0-4, 7) and the condition code bits
>>>>>> (28-31). We currently do not support entry functions that pass
>>>>>> arguments
>>>>>> or return variables on the stack and we diagnose this. This patch
>>>>>> relies
>>>>>> on the existing code to make sure callee-saved registers used in
>>>>>> cmse_nonsecure_entry functions are saved and restored thus retaining
>>>>>> their nonsecure mode value, this should be happening already as it is
>>>>>> required by AAPCS.
>>>>>>
>>>>>> This patch also clears padding bits for cmse_nonsecure_entry functions
>>>>>> with struct and union return types. For unions a bit is only
>>>>>> considered
>>>>>> a padding bit if it is an unused bit in every field of that union. The
>>>>>> function that calculates these is used in a later patch to do the same
>>>>>> for arguments of cmse_nonsecure_call's.
>>>>>>
>>>>>> *** gcc/ChangeLog ***
>>>>>> 2016-07-xx  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>>                Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>>
>>>>>>            * config/arm/arm.c (output_return_instruction): Clear
>>>>>>            registers.
>>>>>>            (thumb2_expand_return): Likewise.
>>>>>>            (thumb1_expand_epilogue): Likewise.
>>>>>>            (thumb_exit): Likewise.
>>>>>>            (arm_expand_epilogue): Likewise.
>>>>>>            (cmse_nonsecure_entry_clear_before_return): New.
>>>>>>            (comp_not_to_clear_mask_str_un): New.
>>>>>>            (compute_not_to_clear_mask): New.
>>>>>>            * config/arm/thumb1.md (*epilogue_insns): Change length
>>>>>> attribute.
>>>>>>            * config/arm/thumb2.md (*thumb2_return): Duplicate
>>>>>> pattern for
>>>>>>            cmse_nonsecure_entry functions.
>>>>>>
>>>>>> *** gcc/testsuite/ChangeLog ***
>>>>>> 2016-07-xx  Andre Vieira        <andre.simoesdiasvieira@arm.com>
>>>>>>                Thomas Preud'homme  <thomas.preudhomme@arm.com>
>>>>>>
>>>>>>            * gcc.target/arm/cmse/cmse.exp: Test different multilibs
>>>>>> separate.
>>>>>>            * gcc.target/arm/cmse/struct-1.c: New.
>>>>>>            * gcc.target/arm/cmse/bitfield-1.c: New.
>>>>>>            * gcc.target/arm/cmse/bitfield-2.c: New.
>>>>>>            * gcc.target/arm/cmse/bitfield-3.c: New.
>>>>>>            * gcc.target/arm/cmse/baseline/cmse-2.c: Test that registers
>>>>>> are
>>>>>> cleared.
>>>>>>            * gcc.target/arm/cmse/mainline/soft/cmse-5.c: New.
>>>>>>            * gcc.target/arm/cmse/mainline/hard/cmse-5.c: New.
>>>>>>            * gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c: New.
>>>>>>            * gcc.target/arm/cmse/mainline/softfp/cmse-5.c: New.
>>>>>>            * gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c: New.
>>>>>>
>>>>> Hi,
>>>>>
>>>>> Rebased previous patch on top of trunk as requested. No changes to
>>>>> ChangeLog.
>>>>>
>>>>> Cheers,
>>>>> Andre
>>>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>>> index
>>>> bb81e5662e81a26c7d3ccf9f749e8e356e6de35e..c6260323ecfd2f2842e6a5aab06b67da16619c73
>>>>
>>>> 100644
>>>> --- a/gcc/config/arm/arm.c
>>>> +++ b/gcc/config/arm/arm.c
>>>> @@ -17496,6 +17496,279 @@ note_invalid_constants (rtx_insn *insn,
>>>> HOST_WIDE_INT address, int do_pushes)
>>>>      return;
>>>>    }
>>>>    +/* This function computes the clear mask and PADDING_BITS_TO_CLEAR
>>>> for
>>>> structs
>>>> +   and unions in the context of ARMv8-M Security Extensions.  It is
>>>> used as a
>>>> +   helper function for both 'cmse_nonsecure_call' and
>>>> 'cmse_nonsecure_entry'
>>>> +   functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to
>>>> either one
>>>> +   or four masks, depending on whether it is being computed for a
>>>> +   'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call'
>>>> argument
>>>> +   respectively.  The tree for the type of the argument or a field
>>>> within an
>>>> +   argument is passed in ARG_TYPE, the current register this argument
>>>> or field
>>>> +   starts in is kept in the pointer REGNO and updated accordingly, the
>>>> bit this
>>>> +   argument or field starts at is passed in STARTING_BIT and the last
>>>> used bit
>>>> +   is kept in LAST_USED_BIT which is also updated accordingly.  */
>>>> +
>>>> +static unsigned HOST_WIDE_INT
>>>> +comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
>>>> +                   uint32_t * padding_bits_to_clear,
>>>> +                   unsigned starting_bit, int * last_used_bit)
>>>> +
>>>> +{
>>>> +  unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
>>>> +
>>>> +  if (TREE_CODE (arg_type) == RECORD_TYPE)
>>>> +    {
>>>> +      unsigned current_bit = starting_bit;
>>>> +      tree field;
>>>> +      long int offset, size;
>>>> +
>>>> +
>>>> +      field = TYPE_FIELDS (arg_type);
>>>> +      while (field)
>>>> +    {
>>>> +      /* The offset within a structure is always an offset from
>>>> +         the start of that structure.  Make sure we take that into the
>>>> +         calculation of the register based offset that we use here.  */
>>>> +      offset = starting_bit;
>>>> +      offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
>>>> +      offset %= 32;
>>>> +
>>>> +      /* This is the actual size of the field, for bitfields this is
>>>> the
>>>> +         bitfield width and not the container size.  */
>>>> +      size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
>>>> +
>>>> +      if (*last_used_bit != offset)
>>>> +        {
>>>> +          if (offset < *last_used_bit)
>>>> +        {
>>>> +          /* This field's offset is before the 'last_used_bit', that
>>>> +             means this field goes on the next register.  So we need to
>>>> +             pad the rest of the current register and increase the
>>>> +             register number.  */
>>>> +          uint32_t mask;
>>>> +          mask  = UINT32_MAX - ((uint32_t) 1 << *last_used_bit);
>>>> +          mask++;
>>>> +
>>>> +          *(padding_bits_to_clear + *regno) |= mask;
>>>>
>>>> padding_bits_to_clear[*regno] |= mask;
>>>>
>>>> +          not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
>>>> +          (*regno)++;
>>>> +        }
>>>> +          else
>>>> +        {
>>>> +          /* Otherwise we pad the bits between the last field's end and
>>>> +             the start of the new field.  */
>>>> +          uint32_t mask;
>>>> +
>>>> +          mask = UINT32_MAX >> (32 - offset);
>>>> +          mask -= ((uint32_t) 1 << *last_used_bit) - 1;
>>>> +          *(padding_bits_to_clear + *regno) |= mask;
>>>>
>>>> Likewise.
>>>>
>>>> +        }
>>>> +          current_bit = offset;
>>>> +        }
>>>> +
>>>> +      /* Calculate further padding bits for inner structs/unions
>>>> too.  */
>>>> +      if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
>>>> +        {
>>>> +          *last_used_bit = current_bit;
>>>> +          not_to_clear_reg_mask
>>>> +        |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
>>>> +                          padding_bits_to_clear, offset,
>>>> +                          last_used_bit);
>>>> +        }
>>>> +      else
>>>> +        {
>>>> +          /* Update 'current_bit' with this field's size.  If the
>>>> +         'current_bit' lies in a subsequent register, update 'regno'
>>>> and
>>>> +         reset 'current_bit' to point to the current bit in that new
>>>> +         register.  */
>>>> +          current_bit += size;
>>>> +          while (current_bit >= 32)
>>>> +        {
>>>> +          current_bit-=32;
>>>> +          not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
>>>> +          (*regno)++;
>>>> +        }
>>>> +          *last_used_bit = current_bit;
>>>> +        }
>>>> +
>>>> +      field = TREE_CHAIN (field);
>>>> +    }
>>>> +      not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
>>>> +    }
>>>> +  else if (TREE_CODE (arg_type) == UNION_TYPE)
>>>> +    {
>>>> +      tree field, field_t;
>>>> +      int i, regno_t, field_size;
>>>> +      int max_reg = -1;
>>>> +      int max_bit = -1;
>>>> +      uint32_t mask;
>>>> +      uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
>>>> +    = {UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX};
>>>> +
>>>> +      /* To compute the padding bits in a union we only consider
>>>> bits as
>>>> +     padding bits if they are always either a padding bit or fall
>>>> outside a
>>>> +     fields size for all fields in the union.  */
>>>> +      field = TYPE_FIELDS (arg_type);
>>>> +      while (field)
>>>> +    {
>>>> +      uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
>>>> +        = {0U, 0U, 0U, 0U};
>>>> +      int last_used_bit_t = *last_used_bit;
>>>> +      regno_t = *regno;
>>>> +      field_t = TREE_TYPE (field);
>>>> +
>>>> +      /* If the field's type is either a record or a union make sure to
>>>> +         compute their padding bits too.  */
>>>> +      if (RECORD_OR_UNION_TYPE_P (field_t))
>>>> +        not_to_clear_reg_mask
>>>> +          |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
>>>> +                        &padding_bits_to_clear_t[0],
>>>> +                        starting_bit, &last_used_bit_t);
>>>> +      else
>>>> +        {
>>>> +          field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
>>>> +          regno_t = (field_size / 32) + *regno;
>>>> +          last_used_bit_t = (starting_bit + field_size) % 32;
>>>> +        }
>>>> +
>>>> +      for (i = *regno; i < regno_t; i++)
>>>> +        {
>>>> +          /* For all but the last register used by this field only keep
>>>> the
>>>> +         padding bits that were padding bits in this field.  */
>>>> +          padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
>>>> +        }
>>>> +
>>>> +        /* For the last register, keep all padding bits that were
>>>> padding
>>>> +           bits in this field and any padding bits that are still valid
>>>> +           as padding bits but fall outside of this field's size.  */
>>>> +        mask = (UINT32_MAX - ((uint32_t) 1 << last_used_bit_t)) + 1;
>>>> +        padding_bits_to_clear_res[regno_t]
>>>> +          &= padding_bits_to_clear_t[regno_t] | mask;
>>>> +
>>>> +      /* Update the maximum size of the fields in terms of registers
>>>> used
>>>> +         ('max_reg') and the 'last_used_bit' in said register.  */
>>>> +      if (max_reg < regno_t)
>>>> +        {
>>>> +          max_reg = regno_t;
>>>> +          max_bit = last_used_bit_t;
>>>> +        }
>>>> +      else if (max_reg == regno_t && max_bit < last_used_bit_t)
>>>> +        max_bit = last_used_bit_t;
>>>> +
>>>> +      field = TREE_CHAIN (field);
>>>> +    }
>>>> +
>>>> +      /* Update the current padding_bits_to_clear using the
>>>> intersection of the
>>>> +     padding bits of all the fields.  */
>>>> +      for (i=*regno; i < max_reg; i++)
>>>> +    padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
>>>> +
>>>>
>>>> watch the spacing in the 'for' definition.
>>>>
>>>>    +      /* Do not keep trailing padding bits, we do not know yet
>>>> whether
>>>> this
>>>> +     is the end of the argument.  */
>>>> +      mask = ((uint32_t) 1 << max_bit) - 1;
>>>> +      padding_bits_to_clear[max_reg]
>>>> +    |= padding_bits_to_clear_res[max_reg] & mask;
>>>> +
>>>> +      *regno = max_reg;
>>>> +      *last_used_bit = max_bit;
>>>> +    }
>>>> +  else
>>>> +    /* This function should only be used for structs and unions.  */
>>>> +    gcc_unreachable ();
>>>> +
>>>> +  return not_to_clear_reg_mask;
>>>> +}
>>>> +
>>>> +/* In the context of ARMv8-M Security Extensions, this function is used
>>>> for both
>>>> +   'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to
>>>> compute what
>>>> +   registers are used when returning or passing arguments, which is
>>>> then
>>>> +   returned as a mask.  It will also compute a mask to indicate
>>>> padding/unused
>>>> +   bits for each of these registers, and passes this through the
>>>> +   PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is
>>>> passed in
>>>> +   ARG_TYPE, the rtl representation of the argument is passed in
>>>> ARG_RTX and
>>>> +   the starting register used to pass this argument or return value is
>>>> passed
>>>> +   in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to
>>>> compute these
>>>> +   for struct and union types.  */
>>>> +
>>>> +static unsigned HOST_WIDE_INT
>>>> +compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
>>>> +                 uint32_t * padding_bits_to_clear)
>>>> +
>>>> +{
>>>> +  int last_used_bit = 0;
>>>> +  unsigned HOST_WIDE_INT not_to_clear_mask;
>>>> +
>>>> +  if (RECORD_OR_UNION_TYPE_P (arg_type))
>>>> +    {
>>>> +      not_to_clear_mask
>>>> +    = comp_not_to_clear_mask_str_un (arg_type, &regno,
>>>> +                     padding_bits_to_clear, 0,
>>>> +                     &last_used_bit);
>>>> +
>>>> +
>>>> +      /* If the 'last_used_bit' is not zero, that means we are still
>>>> using a
>>>> +     part of the last 'regno'.  In such cases we must clear the
>>>> trailing
>>>> +     bits.  Otherwise we are not using regno and we should mark it
>>>> as to
>>>> +     clear.  */
>>>> +      if (last_used_bit != 0)
>>>> +    *(padding_bits_to_clear + regno)
>>>> +      |= UINT32_MAX - ((uint32_t) 1 << last_used_bit) + 1;
>>>>
>>>> padding_bits_to_clear[regno] |= ...
>>>>
>>>> +      else
>>>> +    not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      not_to_clear_mask = 0;
>>>> +      /* We are not dealing with structs nor unions.  So these
>>>> arguments may be
>>>> +     passed in floating point registers too.  In some cases a
>>>> BLKmode is
>>>> +     used when returning or passing arguments in multiple VFP
>>>> registers.  */
>>>> +      if (GET_MODE (arg_rtx) == BLKmode)
>>>> +    {
>>>> +      int i, arg_regs;
>>>> +      rtx reg;
>>>> +
>>>> +      /* This should really only occur when dealing with the hard-float
>>>> +         ABI.  */
>>>> +      gcc_assert (TARGET_HARD_FLOAT_ABI);
>>>> +
>>>> +      for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
>>>> +        {
>>>> +          reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
>>>> +          gcc_assert (REG_P (reg));
>>>> +
>>>> +          not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
>>>> +
>>>> +          /* If we are dealing with DF mode, make sure we don't
>>>> +         clear either of the registers it addresses.  */
>>>> +          arg_regs = ARM_NUM_REGS (GET_MODE (reg));
>>>>
>>>> Better assert here that you're indeed dealing with DFmode and/or you
>>>> have 2 registers.
>>>>
>>> The current code actually works for larger modes too. I don't think code
>>> will ever be generated with larger types, but why assert if it works
>>> anyway?
>> ok, no need to assert here then.
>> I suppose it doesn't add much if the code handles the other modes fine.
>>
>> Kyrill
>>
> Hi,
>
> Reworked comments. No change to ChangeLogs.
>
> Cheers,
> Andre


+/* Clear caller saved registers not used to pass return values and leaked
+   condition flags before exiting a cmse_nonsecure_entry function.  */
+
+void
+cmse_nonsecure_entry_clear_before_return (void)
+{
+  uint64_t to_clear_mask[2];
+  uint32_t padding_bits_to_clear = 0;
+  uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
+  int regno, maxregno = IP_REGNUM;
+  tree result_type;
+  rtx result_rtl;
+
+  to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
+  to_clear_mask[0] |= (1ULL << IP_REGNUM);
+  /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
+     registers.  We also check TARGET_HARD_FLOAT to make sure these are
+     present.  */
+  if (TARGET_HARD_FLOAT)
+    {
+      uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
+      maxregno = LAST_VFP_REGNUM;
+
+      float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
+      to_clear_mask[0] |= float_mask;
+
+      float_mask = (1ULL << (maxregno - 63)) - 1;
+      to_clear_mask[1] = float_mask;
+
+      /* Make sure we dont clear the two scratch registers used to clear the
+	 relevant FPSCR bits in output_return_instruction.  We have only
+	 implemented the clearing of FP registers for Thumb-2, so we assert
+	 here that VFP was not enabled for Thumb-1 ARMv8-M targets.  */
+      gcc_assert (arm_arch_thumb2);

I see this assert triggering when running the testsuite cmse.exp with /-march=armv8-m.base/-mfloat-abi=softfp/-mfpu=fpv5-d16
from a toolchain configured with "--with-cpu=cortex-a15 --with-float=hard --with-mode=thumb --with-fpu=neon-vfpv4".
I think some more validation needs to happen to reject the attributes for invalid configurations.

Kyrill
diff mbox

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index fdbdd423236e7388802bc4bd568f260d95485bbe..9fa216c3a5a3a466448b45a3038d87aecd883433 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -17499,6 +17499,279 @@  note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
   return;
 }
 
+/* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
+   and unions in the context of ARMv8-M Security Extensions.  It is used as a
+   helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
+   functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
+   or four masks, depending on whether it is being computed for a
+   'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
+   respectively.  The tree for the type of the argument or a field within an
+   argument is passed in ARG_TYPE, the current register this argument or field
+   starts in is kept in the pointer REGNO and updated accordingly, the bit this
+   argument or field starts at is passed in STARTING_BIT and the last used bit
+   is kept in LAST_USED_BIT which is also updated accordingly.  */
+
+static unsigned HOST_WIDE_INT
+comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
+			       uint32_t * padding_bits_to_clear,
+			       unsigned starting_bit, int * last_used_bit)
+
+{
+  unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
+
+  if (TREE_CODE (arg_type) == RECORD_TYPE)
+    {
+      unsigned current_bit = starting_bit;
+      tree field;
+      long int offset, size;
+
+
+      field = TYPE_FIELDS (arg_type);
+      while (field)
+	{
+	  /* The offset within a structure is always an offset from
+	     the start of that structure.  Make sure we take that into the
+	     calculation of the register based offset that we use here.  */
+	  offset = starting_bit;
+	  offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
+	  offset %= 32;
+
+	  /* This is the actual size of the field, for bitfields this is the
+	     bitfield width and not the container size.  */
+	  size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
+
+	  if (*last_used_bit != offset)
+	    {
+	      if (offset < *last_used_bit)
+		{
+		  /* This field's offset is before the 'last_used_bit', that
+		     means this field goes on the next register.  So we need to
+		     pad the rest of the current register and increase the
+		     register number.  */
+		  uint32_t mask;
+		  mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
+		  mask++;
+
+		  padding_bits_to_clear[*regno] |= mask;
+		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
+		  (*regno)++;
+		}
+	      else
+		{
+		  /* Otherwise we pad the bits between the last field's end and
+		     the start of the new field.  */
+		  uint32_t mask;
+
+		  mask = ((uint32_t)-1) >> (32 - offset);
+		  mask -= ((uint32_t) 1 << *last_used_bit) - 1;
+		  padding_bits_to_clear[*regno] |= mask;
+		}
+	      current_bit = offset;
+	    }
+
+	  /* Calculate further padding bits for inner structs/unions too.  */
+	  if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
+	    {
+	      *last_used_bit = current_bit;
+	      not_to_clear_reg_mask
+		|= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
+						  padding_bits_to_clear, offset,
+						  last_used_bit);
+	    }
+	  else
+	    {
+	      /* Update 'current_bit' with this field's size.  If the
+		 'current_bit' lies in a subsequent register, update 'regno' and
+		 reset 'current_bit' to point to the current bit in that new
+		 register.  */
+	      current_bit += size;
+	      while (current_bit >= 32)
+		{
+		  current_bit-=32;
+		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
+		  (*regno)++;
+		}
+	      *last_used_bit = current_bit;
+	    }
+
+	  field = TREE_CHAIN (field);
+	}
+      not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
+    }
+  else if (TREE_CODE (arg_type) == UNION_TYPE)
+    {
+      tree field, field_t;
+      int i, regno_t, field_size;
+      int max_reg = -1;
+      int max_bit = -1;
+      uint32_t mask;
+      uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
+	= {-1, -1, -1, -1};
+
+      /* To compute the padding bits in a union we only consider bits as
+	 padding bits if they are always either a padding bit or fall outside a
+	 fields size for all fields in the union.  */
+      field = TYPE_FIELDS (arg_type);
+      while (field)
+	{
+	  uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
+	    = {0U, 0U, 0U, 0U};
+	  int last_used_bit_t = *last_used_bit;
+	  regno_t = *regno;
+	  field_t = TREE_TYPE (field);
+
+	  /* If the field's type is either a record or a union make sure to
+	     compute their padding bits too.  */
+	  if (RECORD_OR_UNION_TYPE_P (field_t))
+	    not_to_clear_reg_mask
+	      |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
+						&padding_bits_to_clear_t[0],
+						starting_bit, &last_used_bit_t);
+	  else
+	    {
+	      field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
+	      regno_t = (field_size / 32) + *regno;
+	      last_used_bit_t = (starting_bit + field_size) % 32;
+	    }
+
+	  for (i = *regno; i < regno_t; i++)
+	    {
+	      /* For all but the last register used by this field only keep the
+		 padding bits that were padding bits in this field.  */
+	      padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
+	    }
+
+	    /* For the last register, keep all padding bits that were padding
+	       bits in this field and any padding bits that are still valid
+	       as padding bits but fall outside of this field's size.  */
+	    mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
+	    padding_bits_to_clear_res[regno_t]
+	      &= padding_bits_to_clear_t[regno_t] | mask;
+
+	  /* Update the maximum size of the fields in terms of registers used
+	     ('max_reg') and the 'last_used_bit' in said register.  */
+	  if (max_reg < regno_t)
+	    {
+	      max_reg = regno_t;
+	      max_bit = last_used_bit_t;
+	    }
+	  else if (max_reg == regno_t && max_bit < last_used_bit_t)
+	    max_bit = last_used_bit_t;
+
+	  field = TREE_CHAIN (field);
+	}
+
+      /* Update the current padding_bits_to_clear using the intersection of the
+	 padding bits of all the fields.  */
+      for (i=*regno; i < max_reg; i++)
+	padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
+
+      /* Do not keep trailing padding bits, we do not know yet whether this
+	 is the end of the argument.  */
+      mask = ((uint32_t) 1 << max_bit) - 1;
+      padding_bits_to_clear[max_reg]
+	|= padding_bits_to_clear_res[max_reg] & mask;
+
+      *regno = max_reg;
+      *last_used_bit = max_bit;
+    }
+  else
+    /* This function should only be used for structs and unions.  */
+    gcc_unreachable ();
+
+  return not_to_clear_reg_mask;
+}
+
+/* In the context of ARMv8-M Security Extensions, this function is used for both
+   'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
+   registers are used when returning or passing arguments, which is then
+   returned as a mask.  It will also compute a mask to indicate padding/unused
+   bits for each of these registers, and passes this through the
+   PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
+   ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
+   the starting register used to pass this argument or return value is passed
+   in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
+   for struct and union types.  */
+
+static unsigned HOST_WIDE_INT
+compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
+			     uint32_t * padding_bits_to_clear)
+
+{
+  int last_used_bit = 0;
+  unsigned HOST_WIDE_INT not_to_clear_mask;
+
+  if (RECORD_OR_UNION_TYPE_P (arg_type))
+    {
+      not_to_clear_mask
+	= comp_not_to_clear_mask_str_un (arg_type, &regno,
+					 padding_bits_to_clear, 0,
+					 &last_used_bit);
+
+
+      /* If the 'last_used_bit' is not zero, that means we are still using a
+	 part of the last 'regno'.  In such cases we must clear the trailing
+	 bits.  Otherwise we are not using regno and we should mark it as to
+	 clear.  */
+      if (last_used_bit != 0)
+	padding_bits_to_clear[regno]
+	  |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
+      else
+	not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
+    }
+  else
+    {
+      not_to_clear_mask = 0;
+      /* We are not dealing with structs nor unions.  So these arguments may be
+	 passed in floating point registers too.  In some cases a BLKmode is
+	 used when returning or passing arguments in multiple VFP registers.  */
+      if (GET_MODE (arg_rtx) == BLKmode)
+	{
+	  int i, arg_regs;
+	  rtx reg;
+
+	  /* This should really only occur when dealing with the hard-float
+	     ABI.  */
+	  gcc_assert (TARGET_HARD_FLOAT_ABI);
+
+	  for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
+	    {
+	      reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
+	      gcc_assert (REG_P (reg));
+
+	      not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
+
+	      /* If we are dealing with DF mode, make sure we don't
+		 clear either of the registers it addresses.  */
+	      arg_regs = ARM_NUM_REGS (GET_MODE (reg));
+	      if (arg_regs > 1)
+		{
+		  unsigned HOST_WIDE_INT mask;
+		  mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
+		  mask -= HOST_WIDE_INT_1U << REGNO (reg);
+		  not_to_clear_mask |= mask;
+		}
+	    }
+	}
+      else
+	{
+	  /* Otherwise we can rely on the MODE to determine how many registers
+	     are being used by this argument.  */
+	  int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
+	  not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
+	  if (arg_regs > 1)
+	    {
+	      unsigned HOST_WIDE_INT
+	      mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
+	      mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
+	      not_to_clear_mask |= mask;
+	    }
+	}
+    }
+
+  return not_to_clear_mask;
+}
+
 /* Rewrite move insn into subtract of 0 if the condition codes will
    be useful in next conditional jump insn.  */
 
@@ -19920,7 +20193,42 @@  output_return_instruction (rtx operand, bool really_return, bool reverse,
 
 	default:
 	  if (IS_CMSE_ENTRY (func_type))
-	    snprintf (instr, sizeof (instr), "bxns%s\t%%|lr", conditional);
+	    {
+	      /* Check if we have to clear the 'GE bits' which is only used if
+		 parallel add and subtraction instructions are available.  */
+	      if (TARGET_INT_SIMD)
+		snprintf (instr, sizeof (instr),
+			  "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
+	      else
+		snprintf (instr, sizeof (instr),
+			  "msr%s\tAPSR_nzcvq, %%|lr", conditional);
+
+	      output_asm_insn (instr, & operand);
+	      if (TARGET_HARD_FLOAT)
+		{
+		  /* Clear the cumulative exception-status bits (0-4,7) and the
+		     condition code bits (28-31) of the FPSCR.  We need to
+		     remember to clear the first scratch register used (IP) and
+		     save and restore the second (r4).  */
+		  snprintf (instr, sizeof (instr), "push\t{%%|r4}");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
+		  output_asm_insn (instr, & operand);
+		}
+	      snprintf (instr, sizeof (instr), "bxns\t%%|lr");
+	    }
 	  /* Use bx if it's available.  */
 	  else if (arm_arch5 || arm_arch4t)
 	    sprintf (instr, "bx%s\t%%|lr", conditional);
@@ -24187,7 +24495,11 @@  thumb_exit (FILE *f, int reg_containing_return_addr)
 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
 
       if (IS_CMSE_ENTRY (arm_current_func_type ()))
-	asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+	{
+	  asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
+		       reg_containing_return_addr);
+	  asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+	}
       else
 	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
       return;
@@ -24423,7 +24735,10 @@  thumb_exit (FILE *f, int reg_containing_return_addr)
 
   /* Return to caller.  */
   if (IS_CMSE_ENTRY (arm_current_func_type ()))
-    asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+    {
+      asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
+      asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+    }
   else
     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
 }
@@ -25285,6 +25600,134 @@  thumb1_expand_prologue (void)
     cfun->machine->lr_save_eliminated = 0;
 }
 
+/* Clear caller saved registers not used to pass return values and leaked
+   condition flags before exiting a cmse_nonsecure_entry function.  */
+
+void
+cmse_nonsecure_entry_clear_before_return (void)
+{
+  uint64_t to_clear_mask[2];
+  uint32_t padding_bits_to_clear = 0;
+  uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
+  int regno, maxregno = IP_REGNUM;
+  tree result_type;
+  rtx result_rtl;
+
+  to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
+  to_clear_mask[0] |= (1ULL << IP_REGNUM);
+  /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
+     registers.  We also check TARGET_HARD_FLOAT to make sure these are
+     present.  */
+  if (TARGET_HARD_FLOAT)
+    {
+      uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
+      maxregno = LAST_VFP_REGNUM;
+
+      float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
+      to_clear_mask[0] |= float_mask;
+
+      float_mask = (1ULL << (maxregno - 63)) - 1;
+      to_clear_mask[1] = float_mask;
+
+      /* Make sure we dont clear the two scratch registers used to clear the
+	 relevant FPSCR bits in output_return_instruction.  We have only
+	 implemented the clearing of FP registers for Thumb-2, so we assert
+	 here that VFP was not enabled for Thumb-1 ARMv8-M targets.  */
+      gcc_assert (arm_arch_thumb2);
+      emit_use (gen_rtx_REG (SImode, IP_REGNUM));
+      to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
+      emit_use (gen_rtx_REG (SImode, 4));
+      to_clear_mask[0] &= ~(1ULL << 4);
+    }
+
+  /* If the user has defined registers to be caller saved, these are no longer
+     restored by the function before returning and must thus be cleared for
+     security purposes.  */
+  for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
+    {
+      /* We do not touch registers that can be used to pass arguments as per
+	 the AAPCS, since these should never be made callee-saved by user
+	 options.  */
+      if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
+	continue;
+      if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
+	continue;
+      if (call_used_regs[regno])
+	to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
+    }
+
+  /* Make sure we do not clear the registers used to return the result in.  */
+  result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
+  if (!VOID_TYPE_P (result_type))
+    {
+      result_rtl = arm_function_value (result_type, current_function_decl, 0);
+
+      /* No need to check that we return in registers, because we don't
+	 support returning on stack yet.  */
+      to_clear_mask[0]
+	&= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
+				       padding_bits_to_clear_ptr);
+    }
+
+  if (padding_bits_to_clear != 0)
+    {
+      rtx reg_rtx;
+      /* Padding bits to clear is not 0 so we know we are dealing with
+	 returning a composite type, which only uses r0.  Let's make sure that
+	 r1-r3 is cleared too, we will use r1 as a scratch register.  */
+      gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
+
+      reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
+
+      /* Fill the lower half of the negated padding_bits_to_clear.  */
+      emit_move_insn (reg_rtx,
+		      GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
+
+      /* Also fill the top half of the negated padding_bits_to_clear.  */
+      if (((~padding_bits_to_clear) >> 16) > 0)
+	emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
+						      GEN_INT (16),
+						      GEN_INT (16)),
+				GEN_INT ((~padding_bits_to_clear) >> 16)));
+
+      emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
+			   gen_rtx_REG (SImode, R0_REGNUM),
+			   reg_rtx));
+    }
+
+  for (regno = 0; regno <= maxregno; regno++)
+    {
+      if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
+	continue;
+
+      if (IS_VFP_REGNUM (regno))
+	{
+	  /* If regno is an even vfp register and its successor is also to
+	     be cleared, use vmov.  */
+	  if (TARGET_VFP_DOUBLE
+	      && VFP_REGNO_OK_FOR_DOUBLE (regno)
+	      && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
+	    {
+	      emit_move_insn (gen_rtx_REG (DFmode, regno++),
+			      CONST1_RTX (DFmode));
+	      emit_use (gen_rtx_REG (DFmode, regno));
+	    }
+	  else
+	    {
+	      emit_move_insn (gen_rtx_REG (SFmode, regno),
+			      CONST1_RTX (SFmode));
+	      emit_use (gen_rtx_REG (SFmode, regno));
+	    }
+	}
+      else
+	{
+	  emit_move_insn (gen_rtx_REG (SImode, regno),
+			  gen_rtx_REG (SImode, LR_REGNUM));
+	  emit_use (gen_rtx_REG (SImode, regno));
+	}
+    }
+}
+
 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
    POP instruction can be generated.  LR should be replaced by PC.  All
    the checks required are already done by  USE_RETURN_INSN ().  Hence,
@@ -25334,6 +25777,8 @@  thumb2_expand_return (bool simple_return)
     }
   else
     {
+      if (IS_CMSE_ENTRY (arm_current_func_type ()))
+	cmse_nonsecure_entry_clear_before_return ();
       emit_jump_insn (simple_return_rtx);
     }
 }
@@ -25392,6 +25837,10 @@  thumb1_expand_epilogue (void)
 
   if (! df_regs_ever_live_p (LR_REGNUM))
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
+
+  /* Clear all caller-saved regs that are not used to return.  */
+  if (IS_CMSE_ENTRY (arm_current_func_type ()))
+    cmse_nonsecure_entry_clear_before_return ();
 }
 
 /* Epilogue code for APCS frame.  */
@@ -25826,6 +26275,14 @@  arm_expand_epilogue (bool really_return)
 				   stack_pointer_rtx, stack_pointer_rtx);
     }
 
+    /* Clear all caller-saved regs that are not used to return.  */
+    if (IS_CMSE_ENTRY (arm_current_func_type ()))
+      {
+	/* CMSE_ENTRY always returns.  */
+	gcc_assert (really_return);
+	cmse_nonsecure_entry_clear_before_return ();
+      }
+
   if (!really_return)
     return;
 
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index cd98de7dcb40de483a9f93c0674bd216f4b0c56a..433fc79ae5810a4d3eb45d1ba80872a39e157e14 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -1843,8 +1843,13 @@ 
   "*
     return thumb1_unexpanded_epilogue ();
   "
-  ; Length is absolute worst case
-  [(set_attr "length" "44")
+  ; Length is absolute worst case, when using CMSE and if this is an entry
+  ; function an extra 4 (MSR) bytes will be added.
+  [(set (attr "length")
+	(if_then_else
+	 (match_test "IS_CMSE_ENTRY (arm_current_func_type ())")
+	 (const_int 48)
+	 (const_int 44)))
    (set_attr "type" "block")
    ;; We don't clobber the conditions, but the potential length of this
    ;; operation is sufficient to make conditionalizing the sequence
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index affcd832b72b7d358347e7370265be492866bb90..f5033ef802c085bd0ce479bc7026db96d9e25632 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1114,12 +1114,31 @@ 
 
 (define_insn "*thumb2_return"
   [(simple_return)]
-  "TARGET_THUMB2"
+  "TARGET_THUMB2 && !IS_CMSE_ENTRY (arm_current_func_type ())"
   "* return output_return_instruction (const_true_rtx, true, false, true);"
   [(set_attr "type" "branch")
    (set_attr "length" "4")]
 )
 
+(define_insn "*thumb2_return"
+  [(simple_return)]
+  "TARGET_THUMB2 && IS_CMSE_ENTRY (arm_current_func_type ())"
+  "* return output_return_instruction (const_true_rtx, true, false, true);"
+  [(set_attr "type" "branch")
+   ; This is a return from a cmse_nonsecure_entry function so code will be
+   ; added to clear the APSR and potentially the FPSCR if VFP is available, so
+   ; we adapt the length accordingly.
+   (set (attr "length")
+     (if_then_else (match_test "TARGET_HARD_FLOAT")
+      (const_int 12)
+      (const_int 8)))
+   ; We do not support predicate execution of returns from cmse_nonsecure_entry
+   ; functions because we need to clear the APSR.  Since predicable has to be
+   ; a constant, we had to duplicate the thumb2_return pattern for CMSE entry
+   ; functions.
+   (set_attr "predicable" "no")]
+)
+
 (define_insn_and_split "thumb2_eh_return"
   [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
 		    VUNSPEC_EH_RETURN)
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..4c6354c7a8cfead895e6c62d61c3f2a2bca1a1f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_base_ok } */
+/* { dg-add-options arm_arch_v8m_base } */
+/* { dg-options "-mcmse" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c b/gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..fccc51d5c82f7955ee4cb8256c1dd38f9ff2670d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c
@@ -0,0 +1,39 @@ 
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  unsigned short  a : 6;
+  unsigned char	  b : 3;
+  unsigned char	  c;
+  unsigned short  d : 8;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = 63u;
+  t.b = 7u;
+  t.c = 255u;
+  t.d = 255u;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != 63u
+      || t.b != 7u
+      || t.c != 255u
+      || t.d != 255u)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tr1, #1855" } } */
+/* { dg-final { scan-assembler "movt\tr1, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c b/gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..e6aee3c4c022d50baec8ab16443130897540e703
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c
@@ -0,0 +1,36 @@ 
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  short	      a : 7;
+  signed char b : 3;
+  short	      c : 11;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = -64;
+  t.b = -4 ;
+  t.c = -1024;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != -64
+      || t.b != -4
+      || t.c != -1024)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tr1, #1919" } } */
+/* { dg-final { scan-assembler "movt\tr1, 2047" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c b/gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..285a2b92f64c1913ef585b8daa4d27c6da0a3d2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c
@@ -0,0 +1,37 @@ 
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  short	      a;
+  signed char b : 2;
+  short		: 1;
+  signed char c : 3;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = -32768;
+  t.b = -2;
+  t.c = -4;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != -32768
+      || t.b != -2
+      || t.c != -4)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tr1, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr1, 63" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse.exp b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
index f797dba1901720e04249d61078c1cbf2a3e436a9..38f18414c2fefec56161e6ac3f7291b03a3b29a3 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
@@ -43,6 +43,26 @@  set LTO_TORTURE_OPTIONS ""
 gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] \
 	"" $DEFAULT_CFLAGS
 
+if {[check_effective_target_arm_arch_v8m_base_ok]} then {
+    # Baseline only
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/baseline/*.c]] \
+	    "" $DEFAULT_CFLAGS
+}
+
+if {[check_effective_target_arm_arch_v8m_main_ok]} then {
+    # Mainline -mfloat-abi=soft
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/soft/*.c]] \
+	    "-mfloat-abi=soft" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/softfp/*.c]] \
+	    "" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/softfp-sp/*.c]] \
+	    "" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/hard/*.c]] \
+	    "" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/hard-sp/*.c]] \
+	    "" $DEFAULT_CFLAGS
+}
+
 set LTO_TORTURE_OPTIONS ${saved-lto_torture_options}
 set dg-do-what-default ${saved-dg-do-what-default}
 
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..88dec2762812614ee986ee1a023c0acce8f94c91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
@@ -0,0 +1,45 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler-not "vmov\.f32\ts0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts7, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts8, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts9, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts10, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts11, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts12, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..29f60baf5212f9fa2e4436fe40c6abe4ac671254
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler-not "vmov\.f32\ts0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..a7229ea8eb2da1da264f58f8518daf303d1bdeda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
+/* { dg-options "-mcmse -mfloat-abi=soft" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..7734d77dc3812e6a158ea1f6b067930846fffccb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
@@ -0,0 +1,46 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "__acle_se_foo:" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts7, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts8, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts9, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts10, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts11, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts12, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..6addaa1a4eda8e2930d5fe72c94697bcf6e604e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "__acle_se_foo:" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/struct-1.c b/gcc/testsuite/gcc.target/arm/cmse/struct-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..2d366a944df692f29ab44e3ee4d33c777b126223
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/struct-1.c
@@ -0,0 +1,33 @@ 
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned short  b;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = 255u;
+  t.b = 32767u;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != 255u || t.b != 32767u)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movs\tr1, #255" } } */
+/* { dg-final { scan-assembler "movt\tr1, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
+