diff mbox series

[v3,12/20] target/riscv: implement zicfiss instructions

Message ID 20240807000652.1417776-13-debug@rivosinc.com
State New
Headers show
Series riscv support for control flow integrity extensions | expand

Commit Message

Deepak Gupta Aug. 7, 2024, 12:06 a.m. UTC
zicfiss has following instructions
 - sspopchk: pops a value from shadow stack and compares with x1/x5.
   If they dont match, reports a sw check exception with tval = 3.
 - sspush: pushes value in x1/x5 on shadow stack
 - ssrdp: reads current shadow stack
 - ssamoswap: swaps contents of shadow stack atomically

sspopchk/sspush/ssrdp default to zimop if zimop implemented and SSE=0

If SSE=0, ssamoswap is illegal instruction exception.

This patch implements shadow stack operations for qemu-user and shadow
stack is not protected.

Signed-off-by: Deepak Gupta <debug@rivosinc.com>
Co-developed-by: Jim Shu <jim.shu@sifive.com>
Co-developed-by: Andy Chiu <andy.chiu@sifive.com>
---
 target/riscv/cpu_bits.h                       |   2 +
 target/riscv/insn32.decode                    |  17 +-
 target/riscv/insn_trans/trans_rva.c.inc       |  47 ++++++
 target/riscv/insn_trans/trans_rvzicfiss.c.inc | 149 ++++++++++++++++++
 target/riscv/translate.c                      |   1 +
 5 files changed, 214 insertions(+), 2 deletions(-)
 create mode 100644 target/riscv/insn_trans/trans_rvzicfiss.c.inc

Comments

Richard Henderson Aug. 7, 2024, 2:39 a.m. UTC | #1
On 8/7/24 10:06, Deepak Gupta wrote:
> zicfiss has following instructions
>   - sspopchk: pops a value from shadow stack and compares with x1/x5.
>     If they dont match, reports a sw check exception with tval = 3.
>   - sspush: pushes value in x1/x5 on shadow stack
>   - ssrdp: reads current shadow stack
>   - ssamoswap: swaps contents of shadow stack atomically
> 
> sspopchk/sspush/ssrdp default to zimop if zimop implemented and SSE=0
> 
> If SSE=0, ssamoswap is illegal instruction exception.
> 
> This patch implements shadow stack operations for qemu-user and shadow
> stack is not protected.
> 
> Signed-off-by: Deepak Gupta <debug@rivosinc.com>
> Co-developed-by: Jim Shu <jim.shu@sifive.com>
> Co-developed-by: Andy Chiu <andy.chiu@sifive.com>
> ---
>   target/riscv/cpu_bits.h                       |   2 +
>   target/riscv/insn32.decode                    |  17 +-
>   target/riscv/insn_trans/trans_rva.c.inc       |  47 ++++++
>   target/riscv/insn_trans/trans_rvzicfiss.c.inc | 149 ++++++++++++++++++
>   target/riscv/translate.c                      |   1 +
>   5 files changed, 214 insertions(+), 2 deletions(-)
>   create mode 100644 target/riscv/insn_trans/trans_rvzicfiss.c.inc
> 
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 226157896d..5ebc4dd5b3 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -702,6 +702,8 @@ typedef enum RISCVException {
>   
>   /* zicfilp defines lp violation results in sw check with tval = 2*/
>   #define RISCV_EXCP_SW_CHECK_FCFI_TVAL      2
> +/* zicfiss defines ss violation results in sw check with tval = 3*/
> +#define RISCV_EXCP_SW_CHECK_BCFI_TVAL      3
>   
>   #define RISCV_EXCP_INT_FLAG                0x80000000
>   #define RISCV_EXCP_INT_MASK                0x7fffffff
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index c963c59c8e..c59c992ce2 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -65,8 +65,10 @@
>   # Formats 32:
>   @r       .......   ..... ..... ... ..... ....... &r                %rs2 %rs1 %rd
>   @i       ............    ..... ... ..... ....... &i      imm=%imm_i     %rs1 %rd
> +@ss_pop  ............    ..... ... ..... ....... &i      imm=0 %rs1 rd=0
>   @b       .......   ..... ..... ... ..... ....... &b      imm=%imm_b %rs2 %rs1
>   @s       .......   ..... ..... ... ..... ....... &s      imm=%imm_s %rs2 %rs1
> +@ss_push .......   ..... ..... ... ..... ....... &s      imm=0 %rs2 rs1=0

No need for single-use formats, or even forcing them into specific argument sets.

> +{
> +  # zicfiss instructions carved out of mop.r
> +  ssrdp      1100110 11100     00000 100 ..... 1110011 %rd
> +  sspopchk   1100110 11100     ..... 100 00000 1110011 @ss_pop

You can check x1/x5 here:

{
   [
     ssrdp     1100110 111000 00000 100 rd:5  1110011
     sspopchk  1100110 111000 00001 100 00000 1110011  rs1=1
     sspopchk  1100110 111000 00101 100 00000 1110011  rs1=5
   ]
   mop_r_n ...
}

which will make things easier for the next insn carved out of mop_r_n.


> diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
> index 39bbf60f3c..db6c03f6a8 100644
> --- a/target/riscv/insn_trans/trans_rva.c.inc
> +++ b/target/riscv/insn_trans/trans_rva.c.inc
> @@ -18,6 +18,8 @@
>    * this program.  If not, see <http://www.gnu.org/licenses/>.
>    */
>   
> +#include "exec/memop.h"
> +
>   #define REQUIRE_A_OR_ZAAMO(ctx) do {                      \
>       if (!ctx->cfg_ptr->ext_zaamo && !has_ext(ctx, RVA)) { \
>           return false;                                     \
> @@ -114,6 +116,28 @@ static bool trans_amoswap_w(DisasContext *ctx, arg_amoswap_w *a)
>       return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, MO_TESL);
>   }
>   
> +static bool trans_ssamoswap_w(DisasContext *ctx, arg_amoswap_w *a)
> +{
> +    REQUIRE_A_OR_ZAAMO(ctx);
> +    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
> +    int ss_mmu_idx = 0;
> +
> +    /* back cfi was not enabled, return false */
> +    if (!ctx->bcfi_enabled) {
> +        return false;
> +    }
> +
> +    TCGv dest = dest_gpr(ctx, a->rd);
> +    TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE);
> +
> +    decode_save_opc(ctx);
> +    src1 = get_address(ctx, a->rs1, 0);
> +
> +    tcg_gen_atomic_xchg_tl(dest, src1, src2, ss_mmu_idx, (MO_ALIGN | MO_TESL));
> +    gen_set_gpr(ctx, a->rd, dest);
> +    return true;
> +}
> +
>   static bool trans_amoadd_w(DisasContext *ctx, arg_amoadd_w *a)
>   {
>       REQUIRE_A_OR_ZAAMO(ctx);
> @@ -183,6 +207,29 @@ static bool trans_amoswap_d(DisasContext *ctx, arg_amoswap_d *a)
>       return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, MO_TEUQ);
>   }
>   
> +static bool trans_ssamoswap_d(DisasContext *ctx, arg_amoswap_w *a)
> +{
> +    REQUIRE_64BIT(ctx);
> +    REQUIRE_A_OR_ZAAMO(ctx);
> +    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
> +    int ss_mmu_idx = 0;
> +
> +    /* back cfi was not enabled, return false */
> +    if (!ctx->bcfi_enabled) {
> +        return false;
> +    }
> +
> +    TCGv dest = dest_gpr(ctx, a->rd);
> +    TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE);
> +
> +    decode_save_opc(ctx);
> +    src1 = get_address(ctx, a->rs1, 0);
> +
> +    tcg_gen_atomic_xchg_tl(dest, src1, src2, ss_mmu_idx, (MO_ALIGN | MO_TESQ));
> +    gen_set_gpr(ctx, a->rd, dest);
> +    return true;
> +}

Why are these in trans_rva.c.inc instead of in trans_rvzicfiss.c.inc?

> +static MemOp mxl_memop(DisasContext *ctx)
> +{
> +    switch (get_xl(ctx)) {
> +    case MXL_RV32:
> +        return MO_TEUL;
> +
> +    case MXL_RV64:
> +        return MO_TEUQ;
> +
> +    case MXL_RV128:
> +        return MO_TEUO;
> +
> +    default:
> +        g_assert_not_reached();
> +    }
> +}

This should be

   return get_xl(ctx) + 1) | MO_TE;

and probably placed next to get_xlen() et al.

> +
> +static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a)
> +{
> +    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
> +    int ss_mmu_idx = 0;

This can't be right, since 0 is M_MODE.

> +
> +    /* sspopchk only supported on 32bit and 64bit */
> +    if (get_xl(ctx) != MXL_RV32 && get_xl(ctx) != MXL_RV64) {
> +        return false;
> +    }

Again, where is this prohibited?  Even if your implementation doesn't allow RV128 (as 
certainly it would be a separate code path here) this should be checked at startup, not 
all over the implementation.

> +    /*
> +     * get data in TCGv using get_gpr
> +     * get addr in TCGv using gen_helper_csrr on CSR_SSP
> +     * use some tcg subtract arithmetic (subtract by XLEN) on addr
> +     * perform ss store on computed address
> +     */
> +
> +    TCGv addr = tcg_temp_new();
> +    TCGLabel *skip = gen_new_label();
> +    uint32_t tmp = (get_xl(ctx) == MXL_RV64) ? 8 : 4;
> +    TCGv_i32 ssp_csr = tcg_constant_i32(CSR_SSP);
> +    TCGv data = tcg_temp_new();
> +    gen_helper_csrr(addr, tcg_env, ssp_csr);

I think you can skip the helper.  You've just validated the extension is enabled:

   tcg_gen_ld_tl(addr, tcg_env, offsetof(CPURISCVState, ssp));

> +    TCGv rs1 = get_gpr(ctx, a->rs1, EXT_NONE);
> +    tcg_gen_brcond_tl(TCG_COND_EQ, data, rs1, skip);
> +    gen_helper_raise_sw_check_excep(tcg_env,
> +        tcg_constant_tl(RISCV_EXCP_SW_CHECK_BCFI_TVAL), data, rs1);
> +    gen_set_label(skip);
> +    tcg_gen_addi_tl(addr, addr, tmp);
> +    gen_helper_csrw(tcg_env, ssp_csr, addr);

   tcg_gen_st_tl(addr, tcg_env, ...);

> +static bool trans_sspush(DisasContext *ctx, arg_sspush *a)

Same comments apply.


r~
Richard Henderson Aug. 7, 2024, 2:56 a.m. UTC | #2
On 8/7/24 12:39, Richard Henderson wrote:
>> +static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a)
>> +{
>> +    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
>> +    int ss_mmu_idx = 0;
> 
> This can't be right, since 0 is M_MODE.

I'm wrong about m-mode here, but "0" is certainly not right.

I strongly suspect you want "ctx->mem_idx | MMU_IDX_SS_ACCESS",
once you add that bit in a few patches.


r~
Deepak Gupta Aug. 7, 2024, 8:35 p.m. UTC | #3
On Wed, Aug 07, 2024 at 12:39:15PM +1000, Richard Henderson wrote:
>On 8/7/24 10:06, Deepak Gupta wrote:
>>zicfiss has following instructions
>>  - sspopchk: pops a value from shadow stack and compares with x1/x5.
>>    If they dont match, reports a sw check exception with tval = 3.
>>  - sspush: pushes value in x1/x5 on shadow stack
>>  - ssrdp: reads current shadow stack
>>  - ssamoswap: swaps contents of shadow stack atomically
>>
>>sspopchk/sspush/ssrdp default to zimop if zimop implemented and SSE=0
>>
>>If SSE=0, ssamoswap is illegal instruction exception.
>>
>>This patch implements shadow stack operations for qemu-user and shadow
>>stack is not protected.
>>
>>Signed-off-by: Deepak Gupta <debug@rivosinc.com>
>>Co-developed-by: Jim Shu <jim.shu@sifive.com>
>>Co-developed-by: Andy Chiu <andy.chiu@sifive.com>
>>---
>>  target/riscv/cpu_bits.h                       |   2 +
>>  target/riscv/insn32.decode                    |  17 +-
>>  target/riscv/insn_trans/trans_rva.c.inc       |  47 ++++++
>>  target/riscv/insn_trans/trans_rvzicfiss.c.inc | 149 ++++++++++++++++++
>>  target/riscv/translate.c                      |   1 +
>>  5 files changed, 214 insertions(+), 2 deletions(-)
>>  create mode 100644 target/riscv/insn_trans/trans_rvzicfiss.c.inc
>>
>>diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
>>index 226157896d..5ebc4dd5b3 100644
>>--- a/target/riscv/cpu_bits.h
>>+++ b/target/riscv/cpu_bits.h
>>@@ -702,6 +702,8 @@ typedef enum RISCVException {
>>  /* zicfilp defines lp violation results in sw check with tval = 2*/
>>  #define RISCV_EXCP_SW_CHECK_FCFI_TVAL      2
>>+/* zicfiss defines ss violation results in sw check with tval = 3*/
>>+#define RISCV_EXCP_SW_CHECK_BCFI_TVAL      3
>>  #define RISCV_EXCP_INT_FLAG                0x80000000
>>  #define RISCV_EXCP_INT_MASK                0x7fffffff
>>diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
>>index c963c59c8e..c59c992ce2 100644
>>--- a/target/riscv/insn32.decode
>>+++ b/target/riscv/insn32.decode
>>@@ -65,8 +65,10 @@
>>  # Formats 32:
>>  @r       .......   ..... ..... ... ..... ....... &r                %rs2 %rs1 %rd
>>  @i       ............    ..... ... ..... ....... &i      imm=%imm_i     %rs1 %rd
>>+@ss_pop  ............    ..... ... ..... ....... &i      imm=0 %rs1 rd=0
>>  @b       .......   ..... ..... ... ..... ....... &b      imm=%imm_b %rs2 %rs1
>>  @s       .......   ..... ..... ... ..... ....... &s      imm=%imm_s %rs2 %rs1
>>+@ss_push .......   ..... ..... ... ..... ....... &s      imm=0 %rs2 rs1=0
>
>No need for single-use formats, or even forcing them into specific argument sets.
>

Noted.

>>+{
>>+  # zicfiss instructions carved out of mop.r
>>+  ssrdp      1100110 11100     00000 100 ..... 1110011 %rd
>>+  sspopchk   1100110 11100     ..... 100 00000 1110011 @ss_pop
>
>You can check x1/x5 here:
>
>{
>  [
>    ssrdp     1100110 111000 00000 100 rd:5  1110011
>    sspopchk  1100110 111000 00001 100 00000 1110011  rs1=1
>    sspopchk  1100110 111000 00101 100 00000 1110011  rs1=5
>  ]
>  mop_r_n ...
>}
>
>which will make things easier for the next insn carved out of mop_r_n.
>

Will fix as you suggesting.

>
>>diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
>>index 39bbf60f3c..db6c03f6a8 100644
>>--- a/target/riscv/insn_trans/trans_rva.c.inc
>>+++ b/target/riscv/insn_trans/trans_rva.c.inc
>>@@ -18,6 +18,8 @@
>>   * this program.  If not, see <http://www.gnu.org/licenses/>.
>>   */
>>+#include "exec/memop.h"
>>+
>>  #define REQUIRE_A_OR_ZAAMO(ctx) do {                      \
>>      if (!ctx->cfg_ptr->ext_zaamo && !has_ext(ctx, RVA)) { \
>>          return false;                                     \
>>@@ -114,6 +116,28 @@ static bool trans_amoswap_w(DisasContext *ctx, arg_amoswap_w *a)
>>      return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, MO_TESL);
>>  }
>>+static bool trans_ssamoswap_w(DisasContext *ctx, arg_amoswap_w *a)
>>+{
>>+    REQUIRE_A_OR_ZAAMO(ctx);
>>+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
>>+    int ss_mmu_idx = 0;
>>+
>>+    /* back cfi was not enabled, return false */
>>+    if (!ctx->bcfi_enabled) {
>>+        return false;
>>+    }
>>+
>>+    TCGv dest = dest_gpr(ctx, a->rd);
>>+    TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE);
>>+
>>+    decode_save_opc(ctx);
>>+    src1 = get_address(ctx, a->rs1, 0);
>>+
>>+    tcg_gen_atomic_xchg_tl(dest, src1, src2, ss_mmu_idx, (MO_ALIGN | MO_TESL));
>>+    gen_set_gpr(ctx, a->rd, dest);
>>+    return true;
>>+}
>>+
>>  static bool trans_amoadd_w(DisasContext *ctx, arg_amoadd_w *a)
>>  {
>>      REQUIRE_A_OR_ZAAMO(ctx);
>>@@ -183,6 +207,29 @@ static bool trans_amoswap_d(DisasContext *ctx, arg_amoswap_d *a)
>>      return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, MO_TEUQ);
>>  }
>>+static bool trans_ssamoswap_d(DisasContext *ctx, arg_amoswap_w *a)
>>+{
>>+    REQUIRE_64BIT(ctx);
>>+    REQUIRE_A_OR_ZAAMO(ctx);
>>+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
>>+    int ss_mmu_idx = 0;
>>+
>>+    /* back cfi was not enabled, return false */
>>+    if (!ctx->bcfi_enabled) {
>>+        return false;
>>+    }
>>+
>>+    TCGv dest = dest_gpr(ctx, a->rd);
>>+    TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE);
>>+
>>+    decode_save_opc(ctx);
>>+    src1 = get_address(ctx, a->rs1, 0);
>>+
>>+    tcg_gen_atomic_xchg_tl(dest, src1, src2, ss_mmu_idx, (MO_ALIGN | MO_TESQ));
>>+    gen_set_gpr(ctx, a->rd, dest);
>>+    return true;
>>+}
>
>Why are these in trans_rva.c.inc instead of in trans_rvzicfiss.c.inc?

encodings are coming out of existing unused atomics and this is why zicfiss is dependent on 
'A' extension.

Although if shadow stack are not enabled for the execution environment then it should be
illegal instruction.

I am fine placing it in trans_rvzicfiss.c.inc as well. Let me know.
>
>>+static MemOp mxl_memop(DisasContext *ctx)
>>+{
>>+    switch (get_xl(ctx)) {
>>+    case MXL_RV32:
>>+        return MO_TEUL;
>>+
>>+    case MXL_RV64:
>>+        return MO_TEUQ;
>>+
>>+    case MXL_RV128:
>>+        return MO_TEUO;
>>+
>>+    default:
>>+        g_assert_not_reached();
>>+    }
>>+}
>
>This should be
>
>  return get_xl(ctx) + 1) | MO_TE;
>
>and probably placed next to get_xlen() et al.

Noted.

>
>>+
>>+static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a)
>>+{
>>+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
>>+    int ss_mmu_idx = 0;
>
>This can't be right, since 0 is M_MODE.

This is 0 only when qemu-user.
If not qemu-user, its obtained differently.

>
>>+
>>+    /* sspopchk only supported on 32bit and 64bit */
>>+    if (get_xl(ctx) != MXL_RV32 && get_xl(ctx) != MXL_RV64) {
>>+        return false;
>>+    }
>
>Again, where is this prohibited?  Even if your implementation doesn't 
>allow RV128 (as certainly it would be a separate code path here) this 
>should be checked at startup, not all over the implementation.
>

It's a left over from when I was starting out and didn't know a lot on qemu (still don't :-))
and RISC-V.

Will remove this and at other places as well.

>>+    /*
>>+     * get data in TCGv using get_gpr
>>+     * get addr in TCGv using gen_helper_csrr on CSR_SSP
>>+     * use some tcg subtract arithmetic (subtract by XLEN) on addr
>>+     * perform ss store on computed address
>>+     */
>>+
>>+    TCGv addr = tcg_temp_new();
>>+    TCGLabel *skip = gen_new_label();
>>+    uint32_t tmp = (get_xl(ctx) == MXL_RV64) ? 8 : 4;
>>+    TCGv_i32 ssp_csr = tcg_constant_i32(CSR_SSP);
>>+    TCGv data = tcg_temp_new();
>>+    gen_helper_csrr(addr, tcg_env, ssp_csr);
>
>I think you can skip the helper.  You've just validated the extension is enabled:
>
>  tcg_gen_ld_tl(addr, tcg_env, offsetof(CPURISCVState, ssp));

Yeah that's right, will do that.

>
>>+    TCGv rs1 = get_gpr(ctx, a->rs1, EXT_NONE);
>>+    tcg_gen_brcond_tl(TCG_COND_EQ, data, rs1, skip);
>>+    gen_helper_raise_sw_check_excep(tcg_env,
>>+        tcg_constant_tl(RISCV_EXCP_SW_CHECK_BCFI_TVAL), data, rs1);
>>+    gen_set_label(skip);
>>+    tcg_gen_addi_tl(addr, addr, tmp);
>>+    gen_helper_csrw(tcg_env, ssp_csr, addr);
>
>  tcg_gen_st_tl(addr, tcg_env, ...);
>
>>+static bool trans_sspush(DisasContext *ctx, arg_sspush *a)
>
>Same comments apply.
>
>
>r~
Deepak Gupta Aug. 7, 2024, 9:25 p.m. UTC | #4
On Wed, Aug 07, 2024 at 12:56:46PM +1000, Richard Henderson wrote:
>On 8/7/24 12:39, Richard Henderson wrote:
>>>+static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a)
>>>+{
>>>+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
>>>+    int ss_mmu_idx = 0;
>>
>>This can't be right, since 0 is M_MODE.
>
>I'm wrong about m-mode here, but "0" is certainly not right.

I followed `riscv_env_mmu_index` here. If CONFIG_USER_ONLY, it returns 0.
For qemu-user, I didn't bother to protect shadow stack from normal stores.
And simply used index 0.

>
>I strongly suspect you want "ctx->mem_idx | MMU_IDX_SS_ACCESS",
>once you add that bit in a few patches.
>
>
>r~
diff mbox series

Patch

diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 226157896d..5ebc4dd5b3 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -702,6 +702,8 @@  typedef enum RISCVException {
 
 /* zicfilp defines lp violation results in sw check with tval = 2*/
 #define RISCV_EXCP_SW_CHECK_FCFI_TVAL      2
+/* zicfiss defines ss violation results in sw check with tval = 3*/
+#define RISCV_EXCP_SW_CHECK_BCFI_TVAL      3
 
 #define RISCV_EXCP_INT_FLAG                0x80000000
 #define RISCV_EXCP_INT_MASK                0x7fffffff
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index c963c59c8e..c59c992ce2 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -65,8 +65,10 @@ 
 # Formats 32:
 @r       .......   ..... ..... ... ..... ....... &r                %rs2 %rs1 %rd
 @i       ............    ..... ... ..... ....... &i      imm=%imm_i     %rs1 %rd
+@ss_pop  ............    ..... ... ..... ....... &i      imm=0 %rs1 rd=0
 @b       .......   ..... ..... ... ..... ....... &b      imm=%imm_b %rs2 %rs1
 @s       .......   ..... ..... ... ..... ....... &s      imm=%imm_s %rs2 %rs1
+@ss_push .......   ..... ..... ... ..... ....... &s      imm=0 %rs2 rs1=0
 @u       ....................      ..... ....... &u      imm=%imm_u          %rd
 @j       ....................      ..... ....... &j      imm=%imm_j          %rd
 
@@ -247,6 +249,7 @@  remud    0000001 .....  ..... 111 ..... 1111011 @r
 lr_w       00010 . . 00000 ..... 010 ..... 0101111 @atom_ld
 sc_w       00011 . . ..... ..... 010 ..... 0101111 @atom_st
 amoswap_w  00001 . . ..... ..... 010 ..... 0101111 @atom_st
+ssamoswap_w 01001 . . ..... ..... 010 ..... 0101111 @atom_st
 amoadd_w   00000 . . ..... ..... 010 ..... 0101111 @atom_st
 amoxor_w   00100 . . ..... ..... 010 ..... 0101111 @atom_st
 amoand_w   01100 . . ..... ..... 010 ..... 0101111 @atom_st
@@ -260,6 +263,7 @@  amomaxu_w  11100 . . ..... ..... 010 ..... 0101111 @atom_st
 lr_d       00010 . . 00000 ..... 011 ..... 0101111 @atom_ld
 sc_d       00011 . . ..... ..... 011 ..... 0101111 @atom_st
 amoswap_d  00001 . . ..... ..... 011 ..... 0101111 @atom_st
+ssamoswap_d 01001 . . ..... ..... 011 ..... 0101111 @atom_st
 amoadd_d   00000 . . ..... ..... 011 ..... 0101111 @atom_st
 amoxor_d   00100 . . ..... ..... 011 ..... 0101111 @atom_st
 amoand_d   01100 . . ..... ..... 011 ..... 0101111 @atom_st
@@ -1023,8 +1027,17 @@  amocas_d    00101 . . ..... ..... 011 ..... 0101111 @atom_st
 amocas_q    00101 . . ..... ..... 100 ..... 0101111 @atom_st
 
 # *** Zimop may-be-operation extension ***
-mop_r_n     1 . 00 .. 0111 .. ..... 100 ..... 1110011 @mop5
-mop_rr_n    1 . 00 .. 1 ..... ..... 100 ..... 1110011 @mop3
+{
+  # zicfiss instructions carved out of mop.r
+  ssrdp      1100110 11100     00000 100 ..... 1110011 %rd
+  sspopchk   1100110 11100     ..... 100 00000 1110011 @ss_pop
+  mop_r_n    1 . 00 .. 0111 .. ..... 100 ..... 1110011 @mop5
+}
+{
+  # zicfiss instruction carved out of mop.rr
+  sspush     1100111 .....     00000 100 00000 1110011 @ss_push
+  mop_rr_n   1 . 00 .. 1 ..... ..... 100 ..... 1110011 @mop3
+}
 
 # *** Zabhb Standard Extension ***
 amoswap_b  00001 . . ..... ..... 000 ..... 0101111 @atom_st
diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
index 39bbf60f3c..db6c03f6a8 100644
--- a/target/riscv/insn_trans/trans_rva.c.inc
+++ b/target/riscv/insn_trans/trans_rva.c.inc
@@ -18,6 +18,8 @@ 
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "exec/memop.h"
+
 #define REQUIRE_A_OR_ZAAMO(ctx) do {                      \
     if (!ctx->cfg_ptr->ext_zaamo && !has_ext(ctx, RVA)) { \
         return false;                                     \
@@ -114,6 +116,28 @@  static bool trans_amoswap_w(DisasContext *ctx, arg_amoswap_w *a)
     return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, MO_TESL);
 }
 
+static bool trans_ssamoswap_w(DisasContext *ctx, arg_amoswap_w *a)
+{
+    REQUIRE_A_OR_ZAAMO(ctx);
+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
+    int ss_mmu_idx = 0;
+
+    /* back cfi was not enabled, return false */
+    if (!ctx->bcfi_enabled) {
+        return false;
+    }
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+
+    decode_save_opc(ctx);
+    src1 = get_address(ctx, a->rs1, 0);
+
+    tcg_gen_atomic_xchg_tl(dest, src1, src2, ss_mmu_idx, (MO_ALIGN | MO_TESL));
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
 static bool trans_amoadd_w(DisasContext *ctx, arg_amoadd_w *a)
 {
     REQUIRE_A_OR_ZAAMO(ctx);
@@ -183,6 +207,29 @@  static bool trans_amoswap_d(DisasContext *ctx, arg_amoswap_d *a)
     return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, MO_TEUQ);
 }
 
+static bool trans_ssamoswap_d(DisasContext *ctx, arg_amoswap_w *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_A_OR_ZAAMO(ctx);
+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
+    int ss_mmu_idx = 0;
+
+    /* back cfi was not enabled, return false */
+    if (!ctx->bcfi_enabled) {
+        return false;
+    }
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+
+    decode_save_opc(ctx);
+    src1 = get_address(ctx, a->rs1, 0);
+
+    tcg_gen_atomic_xchg_tl(dest, src1, src2, ss_mmu_idx, (MO_ALIGN | MO_TESQ));
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
 static bool trans_amoadd_d(DisasContext *ctx, arg_amoadd_d *a)
 {
     REQUIRE_64BIT(ctx);
diff --git a/target/riscv/insn_trans/trans_rvzicfiss.c.inc b/target/riscv/insn_trans/trans_rvzicfiss.c.inc
new file mode 100644
index 0000000000..c538b7ad99
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzicfiss.c.inc
@@ -0,0 +1,149 @@ 
+/*
+ * RISC-V translation routines for the Control-Flow Integrity Extension
+ *
+ * Copyright (c) 2024 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static MemOp mxl_memop(DisasContext *ctx)
+{
+    switch (get_xl(ctx)) {
+    case MXL_RV32:
+        return MO_TEUL;
+
+    case MXL_RV64:
+        return MO_TEUQ;
+
+    case MXL_RV128:
+        return MO_TEUO;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a)
+{
+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
+    int ss_mmu_idx = 0;
+
+    /* sspopchk only supported on 32bit and 64bit */
+    if (get_xl(ctx) != MXL_RV32 && get_xl(ctx) != MXL_RV64) {
+        return false;
+    }
+
+    /* back cfi was not enabled, return false */
+    if (!ctx->bcfi_enabled) {
+        return false;
+    }
+
+    /*
+     * sspopchk can only compare with x1 or x5. Everything else defaults to
+     * zimops
+     */
+
+    if (a->rs1 != 1 && a->rs1 != 5) {
+        return false;
+    }
+
+    /*
+     * get data in TCGv using get_gpr
+     * get addr in TCGv using gen_helper_csrr on CSR_SSP
+     * use some tcg subtract arithmetic (subtract by XLEN) on addr
+     * perform ss store on computed address
+     */
+
+    TCGv addr = tcg_temp_new();
+    TCGLabel *skip = gen_new_label();
+    uint32_t tmp = (get_xl(ctx) == MXL_RV64) ? 8 : 4;
+    TCGv_i32 ssp_csr = tcg_constant_i32(CSR_SSP);
+    TCGv data = tcg_temp_new();
+    gen_helper_csrr(addr, tcg_env, ssp_csr);
+
+    tcg_gen_qemu_ld_tl(data, addr, ss_mmu_idx,
+                       mxl_memop(ctx) | MO_ALIGN);
+    TCGv rs1 = get_gpr(ctx, a->rs1, EXT_NONE);
+    tcg_gen_brcond_tl(TCG_COND_EQ, data, rs1, skip);
+    gen_helper_raise_sw_check_excep(tcg_env,
+        tcg_constant_tl(RISCV_EXCP_SW_CHECK_BCFI_TVAL), data, rs1);
+    gen_set_label(skip);
+    tcg_gen_addi_tl(addr, addr, tmp);
+    gen_helper_csrw(tcg_env, ssp_csr, addr);
+
+    return true;
+}
+
+static bool trans_sspush(DisasContext *ctx, arg_sspush *a)
+{
+    /* default for qemu-user, use regular RW memory and thus mmu_idx=0 */
+    int ss_mmu_idx = 0;
+
+    /* sspush only supported on 32bit and 64bit */
+    if (get_xl(ctx) != MXL_RV32 && get_xl(ctx) != MXL_RV64) {
+        return false;
+    }
+
+    /* back cfi was not enabled, return false */
+    if (!ctx->bcfi_enabled) {
+        return false;
+    }
+
+    /*
+     * sspush can only push from x1 or x5. Everything else defaults to zimop
+     */
+    if (a->rs2 != 1 && a->rs2 != 5) {
+        return false;
+    }
+
+    /*
+     * get data in TCGv using get_gpr
+     * get addr in TCGv using gen_helper_csrr on CSR_SSP
+     * use some tcg subtract arithmetic (subtract by XLEN) on addr
+     * perform ss store on computed address
+     */
+
+    TCGv addr = tcg_temp_new();
+    int tmp = (get_xl(ctx) == MXL_RV64) ? -8 : -4;
+    TCGv_i32 ssp_csr = tcg_constant_i32(CSR_SSP);
+    TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
+    gen_helper_csrr(addr, tcg_env, ssp_csr);
+
+    tcg_gen_addi_tl(addr, addr, tmp);
+
+    tcg_gen_qemu_st_tl(data, addr, ss_mmu_idx,
+                       mxl_memop(ctx) | MO_ALIGN);
+    gen_helper_csrw(tcg_env, ssp_csr, addr);
+
+    return true;
+}
+
+static bool trans_ssrdp(DisasContext *ctx, arg_ssrdp *a)
+{
+    /* ssrdp only supported on 32bit and 64bit */
+    if (get_xl(ctx) != MXL_RV32 && get_xl(ctx) != MXL_RV64) {
+        return false;
+    }
+
+    /* back cfi was not enabled, return false */
+    if (!ctx->bcfi_enabled) {
+        return false;
+    }
+
+    TCGv dest = get_gpr(ctx, a->rd, EXT_NONE);
+    TCGv_i32 ssp_csr = tcg_constant_i32(CSR_SSP);
+    gen_helper_csrr(dest, tcg_env, ssp_csr);
+    gen_set_gpr(ctx, a->rd, dest);
+
+    return true;
+}
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index b0526f5d79..de375c32a1 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1142,6 +1142,7 @@  static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
 #include "insn_trans/trans_rvzawrs.c.inc"
 #include "insn_trans/trans_rvzicbo.c.inc"
 #include "insn_trans/trans_rvzimop.c.inc"
+#include "insn_trans/trans_rvzicfiss.c.inc"
 #include "insn_trans/trans_rvzfa.c.inc"
 #include "insn_trans/trans_rvzfh.c.inc"
 #include "insn_trans/trans_rvk.c.inc"