diff mbox series

Hexagon: move GETPC() calls to top level helpers

Message ID d40fabcf9d6e92e4cd8d6a144e9b2a9acf4580dc.1688420966.git.quic_mathbern@quicinc.com
State New
Headers show
Series Hexagon: move GETPC() calls to top level helpers | expand

Commit Message

Matheus Tavares Bernardino July 3, 2023, 9:49 p.m. UTC
As docs/devel/loads-stores.rst states:

  ``GETPC()`` should be used with great care: calling
  it in other functions that are *not* the top level
  ``HELPER(foo)`` will cause unexpected behavior. Instead, the
  value of ``GETPC()`` should be read from the helper and passed
  if needed to the functions that the helper calls.

Let's fix the GETPC() usage in Hexagon, making sure it's always called
from top level helpers and passed down to the places where it's
needed. There are two snippets where that is not currently the case:

- probe_store(), which is only called from two helpers, so it's easy to
  move GETPC() up.

- mem_load*() functions, which are also called directly from helpers,
  but through the MEM_LOAD*() set of macros. Note that this are only
  used when compiling with --disable-hexagon-idef-parser.

  In this case, we also take this opportunity to simplify the code,
  unifying the mem_load*() functions.

Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
---
 target/hexagon/macros.h    | 22 ++++++++++-------
 target/hexagon/op_helper.h | 11 ++-------
 target/hexagon/op_helper.c | 49 +++++++-------------------------------
 3 files changed, 25 insertions(+), 57 deletions(-)

Comments

Brian Cain July 3, 2023, 9:52 p.m. UTC | #1
> -----Original Message-----
> From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
> Sent: Monday, July 3, 2023 4:50 PM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain <bcain@quicinc.com>; Marco Liebel (QUIC)
> <quic_mliebel@quicinc.com>; ltaylorsimpson@gmail.com
> Subject: [PATCH] Hexagon: move GETPC() calls to top level helpers
> 
> As docs/devel/loads-stores.rst states:
> 
>   ``GETPC()`` should be used with great care: calling
>   it in other functions that are *not* the top level
>   ``HELPER(foo)`` will cause unexpected behavior. Instead, the
>   value of ``GETPC()`` should be read from the helper and passed
>   if needed to the functions that the helper calls.
> 
> Let's fix the GETPC() usage in Hexagon, making sure it's always called
> from top level helpers and passed down to the places where it's
> needed. There are two snippets where that is not currently the case:
> 
> - probe_store(), which is only called from two helpers, so it's easy to
>   move GETPC() up.
> 
> - mem_load*() functions, which are also called directly from helpers,
>   but through the MEM_LOAD*() set of macros. Note that this are only
>   used when compiling with --disable-hexagon-idef-parser.
> 
>   In this case, we also take this opportunity to simplify the code,
>   unifying the mem_load*() functions.
> 
> Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
> ---
>  target/hexagon/macros.h    | 22 ++++++++++-------
>  target/hexagon/op_helper.h | 11 ++-------
>  target/hexagon/op_helper.c | 49 +++++++-------------------------------
>  3 files changed, 25 insertions(+), 57 deletions(-)
> 
> diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
> index 5451b061ee..efb8013912 100644
> --- a/target/hexagon/macros.h
> +++ b/target/hexagon/macros.h
> @@ -173,14 +173,20 @@
>  #define MEM_STORE8(VA, DATA, SLOT) \
>      MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
>  #else
> -#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot,
> VA))
> -#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot,
> VA))
> +
> +#define MEM_LOADn(SIZE, VA) ({ \
> +    check_noshuf(env, pkt_has_store_s1, slot, VA, SIZE); \
> +    cpu_ldub_data_ra(env, VA, GETPC()); \
> +})
> +
> +#define MEM_LOAD1s(VA) ((int8_t)MEM_LOADn(1, VA))
> +#define MEM_LOAD1u(VA) ((uint8_t)MEM_LOADn(1, VA))
> +#define MEM_LOAD2s(VA) ((int16_t)MEM_LOADn(2, VA))
> +#define MEM_LOAD2u(VA) ((uint16_t)MEM_LOADn(2, VA))
> +#define MEM_LOAD4s(VA) ((int32_t)MEM_LOADn(4, VA))
> +#define MEM_LOAD4u(VA) ((uint32_t)MEM_LOADn(4, VA))
> +#define MEM_LOAD8s(VA) ((int64_t)MEM_LOADn(8, VA))
> +#define MEM_LOAD8u(VA) ((uint64_t)MEM_LOADn(8, VA))
> 
>  #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT)
>  #define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT)
> diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h
> index 8f3764d15e..845c3d197e 100644
> --- a/target/hexagon/op_helper.h
> +++ b/target/hexagon/op_helper.h
> @@ -19,15 +19,8 @@
>  #define HEXAGON_OP_HELPER_H
> 
>  /* Misc functions */
> -uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
> -                  uint32_t slot, target_ulong vaddr);
> -uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
> -                   uint32_t slot, target_ulong vaddr);
> -uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
> -                   uint32_t slot, target_ulong vaddr);
> -uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
> -                   uint32_t slot, target_ulong vaddr);
> -
> +void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
> +                  uint32_t slot, target_ulong vaddr, int size);
>  void log_store64(CPUHexagonState *env, target_ulong addr,
>                   int64_t val, int width, int slot);
>  void log_store32(CPUHexagonState *env, target_ulong addr,
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index 12967ac21e..1bc9c7fc2e 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -467,13 +467,12 @@ int32_t HELPER(cabacdecbin_pred)(int64_t RssV,
> int64_t RttV)
>  }
> 
>  static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
> -                        bool is_predicated)
> +                        bool is_predicated, uintptr_t retaddr)
>  {
>      if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
>          size1u_t width = env->mem_log_stores[slot].width;
>          target_ulong va = env->mem_log_stores[slot].va;
> -        uintptr_t ra = GETPC();
> -        probe_write(env, va, width, mmu_idx, ra);
> +        probe_write(env, va, width, mmu_idx, retaddr);
>      }
>  }
> 
> @@ -494,7 +493,8 @@ void
> HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
>      int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0,
> MMU_IDX);
>      bool is_predicated =
>          FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
> -    probe_store(env, 0, mmu_idx, is_predicated);
> +    uintptr_t ra = GETPC();
> +    probe_store(env, 0, mmu_idx, is_predicated, ra);
>  }
> 
>  void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
> @@ -547,12 +547,13 @@ void
> HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
>      bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES,
> S0_IS_PRED);
>      bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES,
> S1_IS_PRED);
>      int mmu_idx = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES,
> MMU_IDX);
> +    uintptr_t ra = GETPC();
> 
>      if (has_st0) {
> -        probe_store(env, 0, mmu_idx, s0_is_pred);
> +        probe_store(env, 0, mmu_idx, s0_is_pred, ra);
>      }
>      if (has_st1) {
> -        probe_store(env, 1, mmu_idx, s1_is_pred);
> +        probe_store(env, 1, mmu_idx, s1_is_pred, ra);
>      }
>      if (has_hvx_stores) {
>          HELPER(probe_hvx_stores)(env, mmu_idx);
> @@ -566,8 +567,8 @@ void
> HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
>   * If the load is in slot 0 and there is a store in slot1 (that
>   * wasn't cancelled), we have to do the store first.
>   */
> -static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
> -                         uint32_t slot, target_ulong vaddr, int size)
> +void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
> +                  uint32_t slot, target_ulong vaddr, int size)
>  {
>      if (slot == 0 && pkt_has_store_s1 &&
>          ((env->slot_cancelled & (1 << 1)) == 0)) {
> @@ -576,38 +577,6 @@ static void check_noshuf(CPUHexagonState *env,
> bool pkt_has_store_s1,
>      }
>  }
> 
> -uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
> -                  uint32_t slot, target_ulong vaddr)
> -{
> -    uintptr_t ra = GETPC();
> -    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1);
> -    return cpu_ldub_data_ra(env, vaddr, ra);
> -}
> -
> -uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
> -                   uint32_t slot, target_ulong vaddr)
> -{
> -    uintptr_t ra = GETPC();
> -    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2);
> -    return cpu_lduw_data_ra(env, vaddr, ra);
> -}
> -
> -uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
> -                   uint32_t slot, target_ulong vaddr)
> -{
> -    uintptr_t ra = GETPC();
> -    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4);
> -    return cpu_ldl_data_ra(env, vaddr, ra);
> -}
> -
> -uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
> -                   uint32_t slot, target_ulong vaddr)
> -{
> -    uintptr_t ra = GETPC();
> -    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8);
> -    return cpu_ldq_data_ra(env, vaddr, ra);
> -}
> -
>  /* Floating point */
>  float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
>  {
> --
> 2.37.2


Reviewed-by: Brian Cain <bcain@quicinc.com>
Matheus Tavares Bernardino July 4, 2023, 8:34 p.m. UTC | #2
> Matheus Tavares Bernardino <quic_mathbern@quicinc.com> wrote:
>
> diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
> index 5451b061ee..efb8013912 100644
> --- a/target/hexagon/macros.h
> +++ b/target/hexagon/macros.h
> @@ -173,14 +173,20 @@
>  #define MEM_STORE8(VA, DATA, SLOT) \
>      MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
>  #else
> -#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
> -#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
> -#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
> -#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
> -#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
> -#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
> -#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot, VA))
> -#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot, VA))
> +
> +#define MEM_LOADn(SIZE, VA) ({ \
> +    check_noshuf(env, pkt_has_store_s1, slot, VA, SIZE); \
> +    cpu_ldub_data_ra(env, VA, GETPC()); \
> +})
> +
> +#define MEM_LOAD1s(VA) ((int8_t)MEM_LOADn(1, VA))
> +#define MEM_LOAD1u(VA) ((uint8_t)MEM_LOADn(1, VA))
> +#define MEM_LOAD2s(VA) ((int16_t)MEM_LOADn(2, VA))
> +#define MEM_LOAD2u(VA) ((uint16_t)MEM_LOADn(2, VA))
> +#define MEM_LOAD4s(VA) ((int32_t)MEM_LOADn(4, VA))
> +#define MEM_LOAD4u(VA) ((uint32_t)MEM_LOADn(4, VA))
> +#define MEM_LOAD8s(VA) ((int64_t)MEM_LOADn(8, VA))
> +#define MEM_LOAD8u(VA) ((uint64_t)MEM_LOADn(8, VA))

Oops, an oversight from my side: this simplification is not correct
since the mem_load*() functions all call different variants of
cpu_ld*_data_ra(). I'll send a v2 correcting that.
Taylor Simpson July 4, 2023, 10 p.m. UTC | #3
-----Original Message-----
From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> 
Sent: Monday, July 3, 2023 3:50 PM
To: qemu-devel@nongnu.org
Cc: bcain@quicinc.com; quic_mliebel@quicinc.com; ltaylorsimpson@gmail.com
Subject: [PATCH] Hexagon: move GETPC() calls to top level helpers

As docs/devel/loads-stores.rst states:

  ``GETPC()`` should be used with great care: calling
  it in other functions that are *not* the top level
  ``HELPER(foo)`` will cause unexpected behavior. Instead, the
  value of ``GETPC()`` should be read from the helper and passed
  if needed to the functions that the helper calls.

Let's fix the GETPC() usage in Hexagon, making sure it's always called from
top level helpers and passed down to the places where it's needed. There are
two snippets where that is not currently the case:

- probe_store(), which is only called from two helpers, so it's easy to
  move GETPC() up.

- mem_load*() functions, which are also called directly from helpers,
  but through the MEM_LOAD*() set of macros. Note that this are only
  used when compiling with --disable-hexagon-idef-parser.

  In this case, we also take this opportunity to simplify the code,
  unifying the mem_load*() functions.

Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
---
 target/hexagon/macros.h    | 22 ++++++++++-------
 target/hexagon/op_helper.h | 11 ++-------  target/hexagon/op_helper.c | 49
+++++++-------------------------------
 3 files changed, 25 insertions(+), 57 deletions(-)

diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index
5451b061ee..efb8013912 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
+
+#define MEM_LOADn(SIZE, VA) ({ \
+    check_noshuf(env, pkt_has_store_s1, slot, VA, SIZE); \
+    cpu_ldub_data_ra(env, VA, GETPC()); \
+})

Note that check_noshuf calls HELPER(probe_noshuf_load) and
HELPER(commit_store).  Both of those call GETPC() from within.  So, you'll
need to pull the contents into separate functions that take ra as an
argument.

Does this pass the test suite?  You are only using the SIZE parameter in
check_noshuf, but cpu_ldub_data_ra only reads a single byte.

+#define MEM_LOAD1s(VA) ((int8_t)MEM_LOADn(1, VA)) #define 
+MEM_LOAD1u(VA) ((uint8_t)MEM_LOADn(1, VA)) #define MEM_LOAD2s(VA) 
+((int16_t)MEM_LOADn(2, VA)) #define MEM_LOAD2u(VA) 
+((uint16_t)MEM_LOADn(2, VA)) #define MEM_LOAD4s(VA) 
+((int32_t)MEM_LOADn(4, VA)) #define MEM_LOAD4u(VA) 
+((uint32_t)MEM_LOADn(4, VA)) #define MEM_LOAD8s(VA) 
+((int64_t)MEM_LOADn(8, VA)) #define MEM_LOAD8u(VA) 
+((uint64_t)MEM_LOADn(8, VA))

A further cleanup would be to remove these altogether and modify the
definition of fLOAD to use MEM_LOADn.

 
 #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT)
#define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT) diff
--git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h index
8f3764d15e..845c3d197e 100644
--- a/target/hexagon/op_helper.h
+++ b/target/hexagon/op_helper.h
@@ -19,15 +19,8 @@
 #define HEXAGON_OP_HELPER_H
 
+void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
+                  uint32_t slot, target_ulong vaddr, int size);

Does this really need to be non-static?


Thanks,
Taylor
Matheus Tavares Bernardino July 5, 2023, 12:45 p.m. UTC | #4
> Taylor <ltaylorsimpson@gmail.com> wrote:
>
> > Matheus Tavares Bernardino <quic_mathbern@quicinc.com> wrote:
> >
> > diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index
> > 5451b061ee..efb8013912 100644
> > --- a/target/hexagon/macros.h
> > +++ b/target/hexagon/macros.h
> > +
> > +#define MEM_LOADn(SIZE, VA) ({ \
> > +    check_noshuf(env, pkt_has_store_s1, slot, VA, SIZE); \
> > +    cpu_ldub_data_ra(env, VA, GETPC()); \
> > +})
> 
> Note that check_noshuf calls HELPER(probe_noshuf_load) and
> HELPER(commit_store).  Both of those call GETPC() from within.  So, you'll
> need to pull the contents into separate functions that take ra as an
> argument.

Ah, good point. It was my understanding that, in case of a memory
exception in one of those nested helper calls, the GETPC() we would want
to use for unwinding was the one from the most recent helper. I'm still
trying to wrap my head around these concepts, though, so I might have
misunderstood it. Is this not the case?

> Does this pass the test suite?  You are only using the SIZE parameter in
> check_noshuf, but cpu_ldub_data_ra only reads a single byte.

My oversight, this has to be fixed, thanks.
Richard Henderson July 5, 2023, 1:46 p.m. UTC | #5
On 7/5/23 14:45, Matheus Tavares Bernardino wrote:
>> Taylor <ltaylorsimpson@gmail.com> wrote:
>> Note that check_noshuf calls HELPER(probe_noshuf_load) and
>> HELPER(commit_store).  Both of those call GETPC() from within.  So, you'll
>> need to pull the contents into separate functions that take ra as an
>> argument.
> 
> Ah, good point. It was my understanding that, in case of a memory
> exception in one of those nested helper calls, the GETPC() we would want
> to use for unwinding was the one from the most recent helper. I'm still
> trying to wrap my head around these concepts, though, so I might have
> misunderstood it. Is this not the case?

No, it is not the case.

GETPC fetches the return address from the current function.

The unwinder which uses this value needs the return address *into* the generated code.

Therefore the HELPER function that is directly called from generated code is the place at 
which GETPC must be used, and nowhere else.  The corollary is that one HELPER should avoid 
calling another HELPER, Just In Case -- use separate intermediate functions instead, as 
Taylor suggests above.


r~
diff mbox series

Patch

diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 5451b061ee..efb8013912 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -173,14 +173,20 @@ 
 #define MEM_STORE8(VA, DATA, SLOT) \
     MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT)
 #else
-#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot, VA))
-#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot, VA))
+
+#define MEM_LOADn(SIZE, VA) ({ \
+    check_noshuf(env, pkt_has_store_s1, slot, VA, SIZE); \
+    cpu_ldub_data_ra(env, VA, GETPC()); \
+})
+
+#define MEM_LOAD1s(VA) ((int8_t)MEM_LOADn(1, VA))
+#define MEM_LOAD1u(VA) ((uint8_t)MEM_LOADn(1, VA))
+#define MEM_LOAD2s(VA) ((int16_t)MEM_LOADn(2, VA))
+#define MEM_LOAD2u(VA) ((uint16_t)MEM_LOADn(2, VA))
+#define MEM_LOAD4s(VA) ((int32_t)MEM_LOADn(4, VA))
+#define MEM_LOAD4u(VA) ((uint32_t)MEM_LOADn(4, VA))
+#define MEM_LOAD8s(VA) ((int64_t)MEM_LOADn(8, VA))
+#define MEM_LOAD8u(VA) ((uint64_t)MEM_LOADn(8, VA))
 
 #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT)
 #define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT)
diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h
index 8f3764d15e..845c3d197e 100644
--- a/target/hexagon/op_helper.h
+++ b/target/hexagon/op_helper.h
@@ -19,15 +19,8 @@ 
 #define HEXAGON_OP_HELPER_H
 
 /* Misc functions */
-uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
-                  uint32_t slot, target_ulong vaddr);
-uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
-                   uint32_t slot, target_ulong vaddr);
-uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
-                   uint32_t slot, target_ulong vaddr);
-uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
-                   uint32_t slot, target_ulong vaddr);
-
+void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
+                  uint32_t slot, target_ulong vaddr, int size);
 void log_store64(CPUHexagonState *env, target_ulong addr,
                  int64_t val, int width, int slot);
 void log_store32(CPUHexagonState *env, target_ulong addr,
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 12967ac21e..1bc9c7fc2e 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -467,13 +467,12 @@  int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV)
 }
 
 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
-                        bool is_predicated)
+                        bool is_predicated, uintptr_t retaddr)
 {
     if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
         size1u_t width = env->mem_log_stores[slot].width;
         target_ulong va = env->mem_log_stores[slot].va;
-        uintptr_t ra = GETPC();
-        probe_write(env, va, width, mmu_idx, ra);
+        probe_write(env, va, width, mmu_idx, retaddr);
     }
 }
 
@@ -494,7 +493,8 @@  void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
     int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
     bool is_predicated =
         FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
-    probe_store(env, 0, mmu_idx, is_predicated);
+    uintptr_t ra = GETPC();
+    probe_store(env, 0, mmu_idx, is_predicated, ra);
 }
 
 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
@@ -547,12 +547,13 @@  void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
     bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED);
     bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED);
     int mmu_idx = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX);
+    uintptr_t ra = GETPC();
 
     if (has_st0) {
-        probe_store(env, 0, mmu_idx, s0_is_pred);
+        probe_store(env, 0, mmu_idx, s0_is_pred, ra);
     }
     if (has_st1) {
-        probe_store(env, 1, mmu_idx, s1_is_pred);
+        probe_store(env, 1, mmu_idx, s1_is_pred, ra);
     }
     if (has_hvx_stores) {
         HELPER(probe_hvx_stores)(env, mmu_idx);
@@ -566,8 +567,8 @@  void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
  * If the load is in slot 0 and there is a store in slot1 (that
  * wasn't cancelled), we have to do the store first.
  */
-static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
-                         uint32_t slot, target_ulong vaddr, int size)
+void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
+                  uint32_t slot, target_ulong vaddr, int size)
 {
     if (slot == 0 && pkt_has_store_s1 &&
         ((env->slot_cancelled & (1 << 1)) == 0)) {
@@ -576,38 +577,6 @@  static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
     }
 }
 
-uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
-                  uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1);
-    return cpu_ldub_data_ra(env, vaddr, ra);
-}
-
-uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
-                   uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2);
-    return cpu_lduw_data_ra(env, vaddr, ra);
-}
-
-uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
-                   uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4);
-    return cpu_ldl_data_ra(env, vaddr, ra);
-}
-
-uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
-                   uint32_t slot, target_ulong vaddr)
-{
-    uintptr_t ra = GETPC();
-    check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8);
-    return cpu_ldq_data_ra(env, vaddr, ra);
-}
-
 /* Floating point */
 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
 {