diff mbox series

[v1,5/6] linux-user/loongarch64: Add LSX sigcontext save/restore

Message ID 20231010033701.385725-6-gaosong@loongson.cn
State New
Headers show
Series linux-user/loongarch64: Add LSX/LASX sigcontext | expand

Commit Message

Song Gao Oct. 10, 2023, 3:37 a.m. UTC
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 linux-user/loongarch64/signal.c | 107 ++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 20 deletions(-)

Comments

Richard Henderson Oct. 28, 2023, 9:35 p.m. UTC | #1
On 10/9/23 20:37, Song Gao wrote:
> Signed-off-by: Song Gao <gaosong@loongson.cn>
> ---
>   linux-user/loongarch64/signal.c | 107 ++++++++++++++++++++++++++------
>   1 file changed, 87 insertions(+), 20 deletions(-)
> 
> diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
> index 277e9f5757..4b09e50a5f 100644
> --- a/linux-user/loongarch64/signal.c
> +++ b/linux-user/loongarch64/signal.c
> @@ -33,6 +33,14 @@ struct target_fpu_context {
>       uint32_t fcsr;
>   } QEMU_ALIGNED(FPU_CTX_ALIGN);
>   
> +#define LSX_CTX_MAGIC           0x53580001
> +#define LSX_CTX_ALIGN           16
> +struct target_lsx_context {
> +    uint64_t regs[2 * 32];
> +    uint64_t fcc;
> +    uint32_t fcsr;
> +} QEMU_ALIGNED(LSX_CTX_ALIGN);

It probably doesn't matter here because fo the alignment, but all types within target 
structures should be using abi_{ullong,uint} etc.


> @@ -99,8 +109,15 @@ static abi_ptr setup_extcontext(struct extctx_layout *extctx, abi_ptr sp)
>   
>       /* For qemu, there is no lazy fp context switch, so fp always present. */
>       extctx->flags = SC_USED_FP;
> -    sp = extframe_alloc(extctx, &extctx->fpu,
> +
> +    if (env->extctx_flags & EXTCTX_FLAGS_LSX) {
> +        sp = extframe_alloc(extctx, &extctx->lsx,
> +                        sizeof(struct target_lsx_context), LSX_CTX_ALIGN, sp);
> +
> +    } else if (env->extctx_flags & EXTCTX_FLAGS_FPU) {
> +        sp = extframe_alloc(extctx, &extctx->fpu,
>                           sizeof(struct target_fpu_context), FPU_CTX_ALIGN, sp);
> +    }

I think this is overly complicated.  (1) The fpu is always present, and (2) you don't need 
a special flag on env, you can check the same CSR bits as for system mode.

I'll note that while this layout matches the kernel, it is an unfortunate set of data 
structures.  Any program has to look for all of {FPU,LSX,LASX}_CTX_MAGIC in order to find 
the basic fp registers.


r~
Song Gao Oct. 30, 2023, 3:28 a.m. UTC | #2
在 2023/10/29 上午5:35, Richard Henderson 写道:
> On 10/9/23 20:37, Song Gao wrote:
>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>> ---
>>   linux-user/loongarch64/signal.c | 107 ++++++++++++++++++++++++++------
>>   1 file changed, 87 insertions(+), 20 deletions(-)
>>
>> diff --git a/linux-user/loongarch64/signal.c 
>> b/linux-user/loongarch64/signal.c
>> index 277e9f5757..4b09e50a5f 100644
>> --- a/linux-user/loongarch64/signal.c
>> +++ b/linux-user/loongarch64/signal.c
>> @@ -33,6 +33,14 @@ struct target_fpu_context {
>>       uint32_t fcsr;
>>   } QEMU_ALIGNED(FPU_CTX_ALIGN);
>>   +#define LSX_CTX_MAGIC           0x53580001
>> +#define LSX_CTX_ALIGN           16
>> +struct target_lsx_context {
>> +    uint64_t regs[2 * 32];
>> +    uint64_t fcc;
>> +    uint32_t fcsr;
>> +} QEMU_ALIGNED(LSX_CTX_ALIGN);
>
> It probably doesn't matter here because fo the alignment, but all 
> types within target structures should be using abi_{ullong,uint} etc.
>
>
Ok,
>> @@ -99,8 +109,15 @@ static abi_ptr setup_extcontext(struct 
>> extctx_layout *extctx, abi_ptr sp)
>>         /* For qemu, there is no lazy fp context switch, so fp always 
>> present. */
>>       extctx->flags = SC_USED_FP;
>> -    sp = extframe_alloc(extctx, &extctx->fpu,
>> +
>> +    if (env->extctx_flags & EXTCTX_FLAGS_LSX) {
>> +        sp = extframe_alloc(extctx, &extctx->lsx,
>> +                        sizeof(struct target_lsx_context), 
>> LSX_CTX_ALIGN, sp);
>> +
>> +    } else if (env->extctx_flags & EXTCTX_FLAGS_FPU) {
>> +        sp = extframe_alloc(extctx, &extctx->fpu,
>>                           sizeof(struct target_fpu_context), 
>> FPU_CTX_ALIGN, sp);
>> +    }
>
> I think this is overly complicated.  (1) The fpu is always present, 
> and (2) you don't need a special flag on env, you can check the same 
> CSR bits as for system mode.
>
I think extctx_flags is incorrectly named, fp_alive_flags or 
vec_alive_flags would be more appropriate.
The flags function like the kernel's 'thread_lsx_context_live', 
'thread_lasx_context_live' functions, checking if the LSX/LASX 
instructions are used.
If we don't use the LSX/LASX instructions, we don't need to use 
lsx_context/lasx_context even though the LSX/LASX enable bit is set.

and  EXTCTX_FLAGS_FPU is not required.

Thanks.
Song Gao
> I'll note that while this layout matches the kernel, it is an 
> unfortunate set of data structures.  Any program has to look for all 
> of {FPU,LSX,LASX}_CTX_MAGIC in order to find the basic fp registers.
>
>
> r~
diff mbox series

Patch

diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
index 277e9f5757..4b09e50a5f 100644
--- a/linux-user/loongarch64/signal.c
+++ b/linux-user/loongarch64/signal.c
@@ -33,6 +33,14 @@  struct target_fpu_context {
     uint32_t fcsr;
 } QEMU_ALIGNED(FPU_CTX_ALIGN);
 
+#define LSX_CTX_MAGIC           0x53580001
+#define LSX_CTX_ALIGN           16
+struct target_lsx_context {
+    uint64_t regs[2 * 32];
+    uint64_t fcc;
+    uint32_t fcsr;
+} QEMU_ALIGNED(LSX_CTX_ALIGN);
+
 #define CONTEXT_INFO_ALIGN      16
 struct target_sctx_info {
     uint32_t magic;
@@ -66,9 +74,10 @@  struct ctx_layout {
 };
 
 struct extctx_layout {
-    unsigned int size;
+    unsigned long size;
     unsigned int flags;
     struct ctx_layout fpu;
+    struct ctx_layout lsx;
     struct ctx_layout end;
 };
 
@@ -90,7 +99,8 @@  static abi_ptr extframe_alloc(struct extctx_layout *extctx,
     return sp;
 }
 
-static abi_ptr setup_extcontext(struct extctx_layout *extctx, abi_ptr sp)
+static abi_ptr setup_extcontext(CPULoongArchState *env,
+                                struct extctx_layout *extctx, abi_ptr sp)
 {
     memset(extctx, 0, sizeof(struct extctx_layout));
 
@@ -99,8 +109,15 @@  static abi_ptr setup_extcontext(struct extctx_layout *extctx, abi_ptr sp)
 
     /* For qemu, there is no lazy fp context switch, so fp always present. */
     extctx->flags = SC_USED_FP;
-    sp = extframe_alloc(extctx, &extctx->fpu,
+
+    if (env->extctx_flags & EXTCTX_FLAGS_LSX) {
+        sp = extframe_alloc(extctx, &extctx->lsx,
+                        sizeof(struct target_lsx_context), LSX_CTX_ALIGN, sp);
+
+    } else if (env->extctx_flags & EXTCTX_FLAGS_FPU) {
+        sp = extframe_alloc(extctx, &extctx->fpu,
                         sizeof(struct target_fpu_context), FPU_CTX_ALIGN, sp);
+    }
 
     return sp;
 }
@@ -110,7 +127,6 @@  static void setup_sigframe(CPULoongArchState *env,
                            struct extctx_layout *extctx)
 {
     struct target_sctx_info *info;
-    struct target_fpu_context *fpu_ctx;
     int i;
 
     __put_user(extctx->flags, &sc->sc_flags);
@@ -121,18 +137,39 @@  static void setup_sigframe(CPULoongArchState *env,
     }
 
     /*
-     * Set fpu context
+     * Set extension context
      */
-    info = extctx->fpu.haddr;
-    __put_user(FPU_CTX_MAGIC, &info->magic);
-    __put_user(extctx->fpu.size, &info->size);
 
-    fpu_ctx = (struct target_fpu_context *)(info + 1);
-    for (i = 0; i < 32; ++i) {
-        __put_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
+    if (env->extctx_flags & EXTCTX_FLAGS_LSX) {
+        struct target_lsx_context *lsx_ctx;
+        info = extctx->lsx.haddr;
+
+        __put_user(LSX_CTX_MAGIC, &info->magic);
+        __put_user(extctx->lsx.size, &info->size);
+
+        lsx_ctx = (struct target_lsx_context *)(info + 1);
+
+        for (i = 0; i < 32; ++i) {
+            __put_user(env->fpr[i].vreg.UD(0), &lsx_ctx->regs[2 * i]);
+            __put_user(env->fpr[i].vreg.UD(1), &lsx_ctx->regs[2 * i + 1]);
+        }
+        __put_user(read_fcc(env), &lsx_ctx->fcc);
+        __put_user(env->fcsr0, &lsx_ctx->fcsr);
+    } else if (env->extctx_flags & EXTCTX_FLAGS_FPU) {
+        struct target_fpu_context *fpu_ctx;
+        info = extctx->fpu.haddr;
+
+        __put_user(FPU_CTX_MAGIC, &info->magic);
+        __put_user(extctx->fpu.size, &info->size);
+
+        fpu_ctx = (struct target_fpu_context *)(info + 1);
+
+        for (i = 0; i < 32; ++i) {
+            __put_user(env->fpr[i].vreg.UD(0), &fpu_ctx->regs[i]);
+        }
+        __put_user(read_fcc(env), &fpu_ctx->fcc);
+        __put_user(env->fcsr0, &fpu_ctx->fcsr);
     }
-    __put_user(read_fcc(env), &fpu_ctx->fcc);
-    __put_user(env->fcsr0, &fpu_ctx->fcsr);
 
     /*
      * Set end context
@@ -169,6 +206,15 @@  static bool parse_extcontext(struct extctx_layout *extctx, abi_ptr frame)
             extctx->fpu.size = size;
             extctx->size += size;
             break;
+        case LSX_CTX_MAGIC:
+            if (size < (sizeof(struct target_sctx_info) +
+                        sizeof(struct target_lsx_context))) {
+                return false;
+            }
+            extctx->lsx.gaddr = frame;
+            extctx->lsx.size = size;
+            extctx->size += size;
+            break;
         default:
             return false;
         }
@@ -182,19 +228,31 @@  static void restore_sigframe(CPULoongArchState *env,
                              struct extctx_layout *extctx)
 {
     int i;
+    uint64_t fcc;
 
     __get_user(env->pc, &sc->sc_pc);
     for (i = 1; i < 32; ++i) {
         __get_user(env->gpr[i], &sc->sc_regs[i]);
     }
 
-    if (extctx->fpu.haddr) {
+    if (extctx->lsx.haddr) {
+        struct target_lsx_context *lsx_ctx =
+            extctx->lsx.haddr + sizeof(struct target_sctx_info);
+
+        for (i = 0; i < 32; ++i) {
+            __get_user(env->fpr[i].vreg.UD(0), &lsx_ctx->regs[2 * i]);
+            __get_user(env->fpr[i].vreg.UD(1), &lsx_ctx->regs[2 * i + 1]);
+        }
+        __get_user(fcc, &lsx_ctx->fcc);
+        write_fcc(env, fcc);
+        __get_user(env->fcsr0, &lsx_ctx->fcsr);
+        restore_fp_status(env);
+    } else if (extctx->fpu.haddr) {
         struct target_fpu_context *fpu_ctx =
             extctx->fpu.haddr + sizeof(struct target_sctx_info);
-        uint64_t fcc;
 
         for (i = 0; i < 32; ++i) {
-            __get_user(env->fpr[i].vreg.D(0), &fpu_ctx->regs[i]);
+            __get_user(env->fpr[i].vreg.UD(0), &fpu_ctx->regs[i]);
         }
         __get_user(fcc, &fpu_ctx->fcc);
         write_fcc(env, fcc);
@@ -214,7 +272,7 @@  static abi_ptr get_sigframe(struct target_sigaction *ka,
 
     sp = target_sigsp(get_sp_from_cpustate(env), ka);
     sp = ROUND_DOWN(sp, 16);
-    sp = setup_extcontext(extctx, sp);
+    sp = setup_extcontext(env, extctx, sp);
     sp -= sizeof(struct target_rt_sigframe);
 
     assert(QEMU_IS_ALIGNED(sp, 16));
@@ -240,8 +298,14 @@  void setup_rt_frame(int sig, struct target_sigaction *ka,
         force_sigsegv(sig);
         return;
     }
-    extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr);
-    extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr);
+
+    if (env->extctx_flags & EXTCTX_FLAGS_LSX) {
+        extctx.lsx.haddr = (void *)frame + (extctx.lsx.gaddr - frame_addr);
+        extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr);
+    } else if (env->extctx_flags & EXTCTX_FLAGS_FPU) {
+        extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr);
+        extctx.end.haddr = (void *)frame + (extctx.end.gaddr - frame_addr);
+    }
 
     tswap_siginfo(&frame->rs_info, info);
 
@@ -284,7 +348,10 @@  long do_rt_sigreturn(CPULoongArchState *env)
     if (!frame) {
         goto badframe;
     }
-    if (extctx.fpu.gaddr) {
+
+    if (extctx.lsx.gaddr) {
+        extctx.lsx.haddr = (void *)frame + (extctx.lsx.gaddr - frame_addr);
+    } else if (extctx.fpu.gaddr) {
         extctx.fpu.haddr = (void *)frame + (extctx.fpu.gaddr - frame_addr);
     }