diff mbox series

[v2,1/2] arm: Zero/Sign extends for CMSE security on Armv8-M.baseline [PR115253]

Message ID 20240607085645.740271-2-torbjorn.svensson@foss.st.com
State New
Headers show
Series arm: Zero/Sign extends for CMSE security on | expand

Commit Message

Torbjorn SVENSSON June 7, 2024, 8:56 a.m. UTC
Properly handle zero and sign extension for Armv8-M.baseline as
Cortex-M23 can have the security extension active.
Currently, there is a internal compiler error on Cortex-M23 for the
epilog processing of sign extension.

This patch addresses the following CVE-2024-0151 for Armv8-M.baseline.

gcc/ChangeLog:

	PR target/115253
	* config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear):
	Sign extend for Thumb1.
	(thumb1_expand_prologue): Add zero/sign extend.

Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
Co-authored-by: Yvan ROUX <yvan.roux@foss.st.com>
---
 gcc/config/arm/arm.cc | 68 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 8 deletions(-)

Comments

Andre Vieira (lists) June 10, 2024, 10:37 a.m. UTC | #1
Hi Torbjorn,

Thanks for this, I have some comments below.

On 07/06/2024 09:56, Torbjörn SVENSSON wrote:
> Properly handle zero and sign extension for Armv8-M.baseline as
> Cortex-M23 can have the security extension active.
> Currently, there is a internal compiler error on Cortex-M23 for the
> epilog processing of sign extension.
> 
> This patch addresses the following CVE-2024-0151 for Armv8-M.baseline.
> 
> gcc/ChangeLog:
> 
> 	PR target/115253
> 	* config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear):
> 	Sign extend for Thumb1.
> 	(thumb1_expand_prologue): Add zero/sign extend.
> 
> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
> Co-authored-by: Yvan ROUX <yvan.roux@foss.st.com>
> ---
>   gcc/config/arm/arm.cc | 68 ++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 60 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index ea0c963a4d6..d1bb173c135 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -19220,17 +19220,23 @@ cmse_nonsecure_call_inline_register_clear (void)
>   	      || TREE_CODE (ret_type) == BOOLEAN_TYPE)
>   	      && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4))
>   	    {
> -	      machine_mode ret_mode = TYPE_MODE (ret_type);
> +	      rtx ret_mode = gen_rtx_REG (TYPE_MODE (ret_type), R0_REGNUM);
> +	      rtx si_mode = gen_rtx_REG (SImode, R0_REGNUM);

I'd rename ret_mode and si_mode to ret_reg and si_reg, so its clear they 
are registers and not actually mode types.

>   	      rtx extend;
>   	      if (TYPE_UNSIGNED (ret_type))
> -		extend = gen_rtx_ZERO_EXTEND (SImode,
> -					      gen_rtx_REG (ret_mode, R0_REGNUM));
> +		extend = gen_rtx_SET (si_mode, gen_rtx_ZERO_EXTEND (SImode,
> +								    ret_mode));
> +	      else if (TARGET_THUMB1)
> +		{
> +		  if (known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 2))
> +		    extend = gen_thumb1_extendqisi2 (si_mode, ret_mode);
> +		  else
> +		    extend = gen_thumb1_extendhisi2 (si_mode, ret_mode);
> +		}
>   	      else
> -		extend = gen_rtx_SIGN_EXTEND (SImode,
> -					      gen_rtx_REG (ret_mode, R0_REGNUM));
> -	      emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM),
> -					     extend), insn);
> -
> +		extend = gen_rtx_SET (si_mode, gen_rtx_SIGN_EXTEND (SImode,
> +								    ret_mode));
> +	      emit_insn_after (extend, insn);
>   	    }

Using gen_rtx_SIGN_EXTEND should work for both, the reason it doesn't is 
because of some weird code in thumb1_extendhisi2, which I'm actually 
gonna look at removing, but I don't think we should block this fix as 
we'd want to backport it ASAP.

But for clearness we should re-order this code so it's clear we only 
need it for that specific case.
Can you maybe do:
if (TYPE_UNSIGNED ..)
{
}
else
{
    /*  Signed-extension is a special case because of 
thumb1_extendhisi2.  */
    if (TARGET_THUMB1
        && known_gt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 2))
      {
         //call the gen_thumb1_extendhisi2
      }
     else
      {
         // use gen_RTX_SIGN_EXTEND
      }
}
>   
>   
> @@ -27250,6 +27256,52 @@ thumb1_expand_prologue (void)
>     live_regs_mask = offsets->saved_regs_mask;
>     lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
>   
> +  /* The AAPCS requires the callee to widen integral types narrower
> +     than 32 bits to the full width of the register; but when handling
> +     calls to non-secure space, we cannot trust the callee to have
> +     correctly done so.  So forcibly re-widen the result here.  */
> +  if (IS_CMSE_ENTRY (func_type))
> +    {
> +      function_args_iterator args_iter;
> +      CUMULATIVE_ARGS args_so_far_v;
> +      cumulative_args_t args_so_far;
> +      bool first_param = true;
> +      tree arg_type;
> +      tree fndecl = current_function_decl;
> +      tree fntype = TREE_TYPE (fndecl);
> +      arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
> +      args_so_far = pack_cumulative_args (&args_so_far_v);
> +      FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
> +	{
> +	  rtx arg_rtx;
> +
> +	  if (VOID_TYPE_P (arg_type))
> +	    break;
> +
> +	  function_arg_info arg (arg_type, /*named=*/true);
> +	  if (!first_param)
> +	    /* We should advance after processing the argument and pass
> +	       the argument we're advancing past.  */
> +	    arm_function_arg_advance (args_so_far, arg);
> +	  first_param = false;
> +	  arg_rtx = arm_function_arg (args_so_far, arg);
> +	  gcc_assert (REG_P (arg_rtx));
> +	  if ((TREE_CODE (arg_type) == INTEGER_TYPE
> +	      || TREE_CODE (arg_type) == ENUMERAL_TYPE
> +	      || TREE_CODE (arg_type) == BOOLEAN_TYPE)
> +	      && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
> +	    {
> +	      rtx res_reg = gen_rtx_REG (SImode, REGNO (arg_rtx));
> +	      if (TYPE_UNSIGNED (arg_type))
> +		emit_set_insn (res_reg, gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
> +	      else if (known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 2))
> +		emit_insn (gen_thumb1_extendqisi2 (res_reg, arg_rtx));
> +	      else
> +		emit_insn (gen_thumb1_extendhisi2 (res_reg, arg_rtx));
For consistency I'd probably do the same as above here:

if TYPE_UNSIGNED
else
   {
     special-case thumb1_extendhisi2
   }
> +	    }
> +	}
> +    }
> +
>     /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
>     l_mask = live_regs_mask & 0x40ff;
>     /* Then count how many other high registers will need to be pushed.  */

The rest LGTM, but I am not a maintainer You'll need an OK from Richard E.

In the meantime I'll test a patch to simplify thumb1_extendhisi2.
Torbjorn SVENSSON June 10, 2024, 12:19 p.m. UTC | #2
Hi Andre,

Thanks for the review!
Please see my questions below.

On 2024-06-10 12:37, Andre Vieira (lists) wrote:
> Hi Torbjorn,
> 
> Thanks for this, I have some comments below.
> 
> On 07/06/2024 09:56, Torbjörn SVENSSON wrote:
>> Properly handle zero and sign extension for Armv8-M.baseline as
>> Cortex-M23 can have the security extension active.
>> Currently, there is a internal compiler error on Cortex-M23 for the
>> epilog processing of sign extension.
>>
>> This patch addresses the following CVE-2024-0151 for Armv8-M.baseline.
>>
>> gcc/ChangeLog:
>>
>>     PR target/115253
>>     * config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear):
>>     Sign extend for Thumb1.
>>     (thumb1_expand_prologue): Add zero/sign extend.
>>
>> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
>> Co-authored-by: Yvan ROUX <yvan.roux@foss.st.com>
>> ---
>>   gcc/config/arm/arm.cc | 68 ++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 60 insertions(+), 8 deletions(-)
>>
>> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
>> index ea0c963a4d6..d1bb173c135 100644
>> --- a/gcc/config/arm/arm.cc
>> +++ b/gcc/config/arm/arm.cc
>> @@ -19220,17 +19220,23 @@ cmse_nonsecure_call_inline_register_clear 
>> (void)
>>             || TREE_CODE (ret_type) == BOOLEAN_TYPE)
>>             && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4))
>>           {
>> -          machine_mode ret_mode = TYPE_MODE (ret_type);
>> +          rtx ret_mode = gen_rtx_REG (TYPE_MODE (ret_type), R0_REGNUM);
>> +          rtx si_mode = gen_rtx_REG (SImode, R0_REGNUM);
> 
> I'd rename ret_mode and si_mode to ret_reg and si_reg, so its clear they 
> are registers and not actually mode types.

Okay, will be changed before push and/or a V3 of the patches.

>>             rtx extend;
>>             if (TYPE_UNSIGNED (ret_type))
>> -        extend = gen_rtx_ZERO_EXTEND (SImode,
>> -                          gen_rtx_REG (ret_mode, R0_REGNUM));
>> +        extend = gen_rtx_SET (si_mode, gen_rtx_ZERO_EXTEND (SImode,
>> +                                    ret_mode));
>> +          else if (TARGET_THUMB1)
>> +        {
>> +          if (known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 2))
>> +            extend = gen_thumb1_extendqisi2 (si_mode, ret_mode);
>> +          else
>> +            extend = gen_thumb1_extendhisi2 (si_mode, ret_mode);
>> +        }
>>             else
>> -        extend = gen_rtx_SIGN_EXTEND (SImode,
>> -                          gen_rtx_REG (ret_mode, R0_REGNUM));
>> -          emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM),
>> -                         extend), insn);
>> -
>> +        extend = gen_rtx_SET (si_mode, gen_rtx_SIGN_EXTEND (SImode,
>> +                                    ret_mode));
>> +          emit_insn_after (extend, insn);
>>           }
> 
> Using gen_rtx_SIGN_EXTEND should work for both, the reason it doesn't is 
> because of some weird code in thumb1_extendhisi2, which I'm actually 
> gonna look at removing, but I don't think we should block this fix as 
> we'd want to backport it ASAP.
> 
> But for clearness we should re-order this code so it's clear we only 
> need it for that specific case.
> Can you maybe do:
> if (TYPE_UNSIGNED ..)
> {
> }
> else
> {
>     /*  Signed-extension is a special case because of 
> thumb1_extendhisi2.  */
>     if (TARGET_THUMB1
>         && known_gt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 2))
>       {
>          //call the gen_thumb1_extendhisi2
>       }
>      else
>       {
>          // use gen_RTX_SIGN_EXTEND
>       }
> }

So, you talk about gen_thumb1_extendhisi2, but there is also 
gen_thumb1_extendqisi2. Will it actually be cleaner if the block is 
indented one level?
The comment can be added in the "if (TARGET_THUMB1)" block regardless to 
indicate that gen_rtx_SIGN_EXTEND can't be used.

>> @@ -27250,6 +27256,52 @@ thumb1_expand_prologue (void)
>>     live_regs_mask = offsets->saved_regs_mask;
>>     lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
>> +  /* The AAPCS requires the callee to widen integral types narrower
>> +     than 32 bits to the full width of the register; but when handling
>> +     calls to non-secure space, we cannot trust the callee to have
>> +     correctly done so.  So forcibly re-widen the result here.  */
>> +  if (IS_CMSE_ENTRY (func_type))
>> +    {
>> +      function_args_iterator args_iter;
>> +      CUMULATIVE_ARGS args_so_far_v;
>> +      cumulative_args_t args_so_far;
>> +      bool first_param = true;
>> +      tree arg_type;
>> +      tree fndecl = current_function_decl;
>> +      tree fntype = TREE_TYPE (fndecl);
>> +      arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, 
>> fndecl);
>> +      args_so_far = pack_cumulative_args (&args_so_far_v);
>> +      FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
>> +    {
>> +      rtx arg_rtx;
>> +
>> +      if (VOID_TYPE_P (arg_type))
>> +        break;
>> +
>> +      function_arg_info arg (arg_type, /*named=*/true);
>> +      if (!first_param)
>> +        /* We should advance after processing the argument and pass
>> +           the argument we're advancing past.  */
>> +        arm_function_arg_advance (args_so_far, arg);
>> +      first_param = false;
>> +      arg_rtx = arm_function_arg (args_so_far, arg);
>> +      gcc_assert (REG_P (arg_rtx));
>> +      if ((TREE_CODE (arg_type) == INTEGER_TYPE
>> +          || TREE_CODE (arg_type) == ENUMERAL_TYPE
>> +          || TREE_CODE (arg_type) == BOOLEAN_TYPE)
>> +          && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
>> +        {
>> +          rtx res_reg = gen_rtx_REG (SImode, REGNO (arg_rtx));
>> +          if (TYPE_UNSIGNED (arg_type))
>> +        emit_set_insn (res_reg, gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
>> +          else if (known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 2))
>> +        emit_insn (gen_thumb1_extendqisi2 (res_reg, arg_rtx));
>> +          else
>> +        emit_insn (gen_thumb1_extendhisi2 (res_reg, arg_rtx));
> For consistency I'd probably do the same as above here:
> 
> if TYPE_UNSIGNED
> else
>    {
>      special-case thumb1_extendhisi2
>    }
>> +        }
>> +    }
>> +    }
>> +
>>     /* Extract a mask of the ones we can give to the Thumb's push 
>> instruction.  */
>>     l_mask = live_regs_mask & 0x40ff;
>>     /* Then count how many other high registers will need to be 
>> pushed.  */
> 
> The rest LGTM, but I am not a maintainer You'll need an OK from Richard E.
> 
> In the meantime I'll test a patch to simplify thumb1_extendhisi2.

Whit the patch you have in mind, will it be possible to call 
gen_rtx_SIGN_EXTEND  for THUMB1 too? Or do we need to keep calling 
thumb1_extendhisi2 and thumb1_extendqisi2?

Kind regards,
Torbjörn
Andre Vieira (lists) June 10, 2024, 12:51 p.m. UTC | #3
Hi,

> So, you talk about gen_thumb1_extendhisi2, but there is also 
> gen_thumb1_extendqisi2. Will it actually be cleaner if the block is 
> indented one level?
> The comment can be added in the "if (TARGET_THUMB1)" block regardless to 
> indicate that gen_rtx_SIGN_EXTEND can't be used.
> 

gen_rtx_SIGN_EXTEND (I see I used wrong caps above before sorry!) will 
work for the case of QImode -> SImode for Thumb1. The reason it doesn't 
work for HImode -> SImode in thumb1 is because thumb1_extendhisi2 uses a 
scratch register, see:
(define_insn "thumb1_extendhisi2"
   [(set (match_operand:SI 0 "register_operand" "=l,l")
         (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m")))
    (clobber (match_scratch:SI 2 "=X,l"))]

  meaning that the pattern generated with gen_rtx_SIGN_EXTEND:
[(set (SImode ...) (sign_extend:SImode (HImode))]
Does not match this and there are no other thumb1 patterns that match 
this either, so the compiler ICEs. For thumb1_extendqisi2 the pattern 
doesn't have the scratch register so a:
  gen_rtx_SET (<rtx:SImode>, gen_rtx_SIGN_EXTEND (SImode, <rtx:QImode>))
will generate a rtl pattern that will match thumb1_extendqisi2.

Hope that makes it clearer.

> 
> Whit the patch you have in mind, will it be possible to call 
> gen_rtx_SIGN_EXTEND  for THUMB1 too? Or do we need to keep calling 
> thumb1_extendhisi2 and thumb1_extendqisi2?

With patch I have in mind, and will post soon, you could use 
gen_rtx_SIGN_EXTEND for HImode in thumb1, like I explained before for 
QImode its already possible.

That series will have another patch that also removes all calls to 
gen_thumb1_extend* and replaces them with gen_rtx_SET + 
gen_rtx_SIGN_EXTEND and renames the "thumb1_extend{hisi2,qisi1} patterns 
to "*thumb1_extend{hisi2,qisi2}".

However, that patch we won't backport, but yours we should hence why 
it's worth having your patch with the thumb1_extendhisi2 workaround.

Thanks,

Andre
diff mbox series

Patch

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index ea0c963a4d6..d1bb173c135 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -19220,17 +19220,23 @@  cmse_nonsecure_call_inline_register_clear (void)
 	      || TREE_CODE (ret_type) == BOOLEAN_TYPE)
 	      && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4))
 	    {
-	      machine_mode ret_mode = TYPE_MODE (ret_type);
+	      rtx ret_mode = gen_rtx_REG (TYPE_MODE (ret_type), R0_REGNUM);
+	      rtx si_mode = gen_rtx_REG (SImode, R0_REGNUM);
 	      rtx extend;
 	      if (TYPE_UNSIGNED (ret_type))
-		extend = gen_rtx_ZERO_EXTEND (SImode,
-					      gen_rtx_REG (ret_mode, R0_REGNUM));
+		extend = gen_rtx_SET (si_mode, gen_rtx_ZERO_EXTEND (SImode,
+								    ret_mode));
+	      else if (TARGET_THUMB1)
+		{
+		  if (known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 2))
+		    extend = gen_thumb1_extendqisi2 (si_mode, ret_mode);
+		  else
+		    extend = gen_thumb1_extendhisi2 (si_mode, ret_mode);
+		}
 	      else
-		extend = gen_rtx_SIGN_EXTEND (SImode,
-					      gen_rtx_REG (ret_mode, R0_REGNUM));
-	      emit_insn_after (gen_rtx_SET (gen_rtx_REG (SImode, R0_REGNUM),
-					     extend), insn);
-
+		extend = gen_rtx_SET (si_mode, gen_rtx_SIGN_EXTEND (SImode,
+								    ret_mode));
+	      emit_insn_after (extend, insn);
 	    }
 
 
@@ -27250,6 +27256,52 @@  thumb1_expand_prologue (void)
   live_regs_mask = offsets->saved_regs_mask;
   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
 
+  /* The AAPCS requires the callee to widen integral types narrower
+     than 32 bits to the full width of the register; but when handling
+     calls to non-secure space, we cannot trust the callee to have
+     correctly done so.  So forcibly re-widen the result here.  */
+  if (IS_CMSE_ENTRY (func_type))
+    {
+      function_args_iterator args_iter;
+      CUMULATIVE_ARGS args_so_far_v;
+      cumulative_args_t args_so_far;
+      bool first_param = true;
+      tree arg_type;
+      tree fndecl = current_function_decl;
+      tree fntype = TREE_TYPE (fndecl);
+      arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
+      args_so_far = pack_cumulative_args (&args_so_far_v);
+      FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
+	{
+	  rtx arg_rtx;
+
+	  if (VOID_TYPE_P (arg_type))
+	    break;
+
+	  function_arg_info arg (arg_type, /*named=*/true);
+	  if (!first_param)
+	    /* We should advance after processing the argument and pass
+	       the argument we're advancing past.  */
+	    arm_function_arg_advance (args_so_far, arg);
+	  first_param = false;
+	  arg_rtx = arm_function_arg (args_so_far, arg);
+	  gcc_assert (REG_P (arg_rtx));
+	  if ((TREE_CODE (arg_type) == INTEGER_TYPE
+	      || TREE_CODE (arg_type) == ENUMERAL_TYPE
+	      || TREE_CODE (arg_type) == BOOLEAN_TYPE)
+	      && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
+	    {
+	      rtx res_reg = gen_rtx_REG (SImode, REGNO (arg_rtx));
+	      if (TYPE_UNSIGNED (arg_type))
+		emit_set_insn (res_reg, gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
+	      else if (known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 2))
+		emit_insn (gen_thumb1_extendqisi2 (res_reg, arg_rtx));
+	      else
+		emit_insn (gen_thumb1_extendhisi2 (res_reg, arg_rtx));
+	    }
+	}
+    }
+
   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
   l_mask = live_regs_mask & 0x40ff;
   /* Then count how many other high registers will need to be pushed.  */