diff mbox series

[v2,3/4] target/ppc: Make checkstop actually stop the system

Message ID 20230627134644.260663-4-npiggin@gmail.com
State New
Headers show
Series target/ppc: Catch invalid real address accesses | expand

Commit Message

Nicholas Piggin June 27, 2023, 1:46 p.m. UTC
checkstop state does not halt the system, interrupts continue to be
serviced, and other CPUs run.

Stop the machine with vm_stop(), and print a register dump too.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Since v1:
- Fix loop exit so it stops on the attn instruction, rather than
  after it.

 target/ppc/excp_helper.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

Comments

BALATON Zoltan June 27, 2023, 5:38 p.m. UTC | #1
On Tue, 27 Jun 2023, Nicholas Piggin wrote:
> checkstop state does not halt the system, interrupts continue to be
> serviced, and other CPUs run.
>
> Stop the machine with vm_stop(), and print a register dump too.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> Since v1:
> - Fix loop exit so it stops on the attn instruction, rather than
>  after it.
>
> target/ppc/excp_helper.c | 34 ++++++++++++++++++++--------------
> 1 file changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 5beda973ce..28d8a9b212 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -19,6 +19,7 @@
> #include "qemu/osdep.h"
> #include "qemu/main-loop.h"
> #include "qemu/log.h"
> +#include "sysemu/runstate.h"
> #include "cpu.h"
> #include "exec/exec-all.h"
> #include "internal.h"
> @@ -186,19 +187,24 @@ static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp)
>              env->error_code);
> }
>
> -static void powerpc_checkstop(CPUPPCState *env)
> +static void powerpc_checkstop(CPUPPCState *env, const char *reason)
> {
>     CPUState *cs = env_cpu(env);
>
> -    /* Machine check exception is not enabled. Enter checkstop state. */
> -    fprintf(stderr, "Machine check while not allowed. "
> -            "Entering checkstop state\n");
> +    vm_stop(RUN_STATE_GUEST_PANICKED);
> +
> +    fprintf(stderr, "Entering checkstop state: %s\n", reason);
> +    cpu_dump_state(cs, stderr, CPU_DUMP_FPU | CPU_DUMP_CCOP);
>     if (qemu_log_separate()) {
> -        qemu_log("Machine check while not allowed. "
> -                 "Entering checkstop state\n");
> +        FILE *logfile = qemu_log_trylock();
> +        if (logfile) {
> +            fprintf(logfile, "Entering checkstop state: %s\n", reason);

I don't think you should have fprintfs here. Is this remnants of debug 
code left here by mistake? The fprintf that was there before may also need 
to be converted to some qemI_log or error_report but I did not know what 
these are for and did not address that. But if you want to add more then 
it may need to be solved first.

> +            cpu_dump_state(cs, logfile, CPU_DUMP_FPU | CPU_DUMP_CCOP);
> +            qemu_log_unlock(logfile);
> +        }
>     }
> -    cs->halted = 1;
> -    cpu_interrupt_exittb(cs);
> +

Excess blank line?

> +    cpu_loop_exit_noexc(cs);
> }
>
> #if defined(TARGET_PPC64)
> @@ -483,7 +489,7 @@ static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
>         break;
>     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
>         if (!FIELD_EX64(env->msr, MSR, ME)) {
> -            powerpc_checkstop(env);
> +            powerpc_checkstop(env, "machine check with MSR[ME]=0");

If the message is always the same why pass it from here If the only other 
option not used yet would be MSR[ME]=1 then that could also be checked in 
the func so no need to pass the message. So is there any other possible 
reason here?

Regards,
BALATON Zoltan

>         }
>         /* machine check exceptions don't have ME set */
>         new_msr &= ~((target_ulong)1 << MSR_ME);
> @@ -602,7 +608,7 @@ static void powerpc_excp_6xx(PowerPCCPU *cpu, int excp)
>         break;
>     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
>         if (!FIELD_EX64(env->msr, MSR, ME)) {
> -            powerpc_checkstop(env);
> +            powerpc_checkstop(env, "machine check with MSR[ME]=0");
>         }
>         /* machine check exceptions don't have ME set */
>         new_msr &= ~((target_ulong)1 << MSR_ME);
> @@ -763,7 +769,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
>     switch (excp) {
>     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
>         if (!FIELD_EX64(env->msr, MSR, ME)) {
> -            powerpc_checkstop(env);
> +            powerpc_checkstop(env, "machine check with MSR[ME]=0");
>         }
>         /* machine check exceptions don't have ME set */
>         new_msr &= ~((target_ulong)1 << MSR_ME);
> @@ -936,7 +942,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
>     switch (excp) {
>     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
>         if (!FIELD_EX64(env->msr, MSR, ME)) {
> -            powerpc_checkstop(env);
> +            powerpc_checkstop(env, "machine check with MSR[ME]=0");
>         }
>         /* machine check exceptions don't have ME set */
>         new_msr &= ~((target_ulong)1 << MSR_ME);
> @@ -1119,7 +1125,7 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
>         break;
>     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
>         if (!FIELD_EX64(env->msr, MSR, ME)) {
> -            powerpc_checkstop(env);
> +            powerpc_checkstop(env, "machine check with MSR[ME]=0");
>         }
>         /* machine check exceptions don't have ME set */
>         new_msr &= ~((target_ulong)1 << MSR_ME);
> @@ -1424,7 +1430,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
>     switch (excp) {
>     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
>         if (!FIELD_EX64(env->msr, MSR, ME)) {
> -            powerpc_checkstop(env);
> +            powerpc_checkstop(env, "machine check with MSR[ME]=0");
>         }
>         if (env->msr_mask & MSR_HVB) {
>             /*
>
Nicholas Piggin June 28, 2023, 12:55 a.m. UTC | #2
On Wed Jun 28, 2023 at 3:38 AM AEST, BALATON Zoltan wrote:
> On Tue, 27 Jun 2023, Nicholas Piggin wrote:
> > checkstop state does not halt the system, interrupts continue to be
> > serviced, and other CPUs run.
> >
> > Stop the machine with vm_stop(), and print a register dump too.
> >
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> > Since v1:
> > - Fix loop exit so it stops on the attn instruction, rather than
> >  after it.
> >
> > target/ppc/excp_helper.c | 34 ++++++++++++++++++++--------------
> > 1 file changed, 20 insertions(+), 14 deletions(-)
> >
> > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> > index 5beda973ce..28d8a9b212 100644
> > --- a/target/ppc/excp_helper.c
> > +++ b/target/ppc/excp_helper.c
> > @@ -19,6 +19,7 @@
> > #include "qemu/osdep.h"
> > #include "qemu/main-loop.h"
> > #include "qemu/log.h"
> > +#include "sysemu/runstate.h"
> > #include "cpu.h"
> > #include "exec/exec-all.h"
> > #include "internal.h"
> > @@ -186,19 +187,24 @@ static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp)
> >              env->error_code);
> > }
> >
> > -static void powerpc_checkstop(CPUPPCState *env)
> > +static void powerpc_checkstop(CPUPPCState *env, const char *reason)
> > {
> >     CPUState *cs = env_cpu(env);
> >
> > -    /* Machine check exception is not enabled. Enter checkstop state. */
> > -    fprintf(stderr, "Machine check while not allowed. "
> > -            "Entering checkstop state\n");
> > +    vm_stop(RUN_STATE_GUEST_PANICKED);
> > +
> > +    fprintf(stderr, "Entering checkstop state: %s\n", reason);
> > +    cpu_dump_state(cs, stderr, CPU_DUMP_FPU | CPU_DUMP_CCOP);
> >     if (qemu_log_separate()) {
> > -        qemu_log("Machine check while not allowed. "
> > -                 "Entering checkstop state\n");
> > +        FILE *logfile = qemu_log_trylock();
> > +        if (logfile) {
> > +            fprintf(logfile, "Entering checkstop state: %s\n", reason);
>
> I don't think you should have fprintfs here. Is this remnants of debug 
> code left here by mistake? The fprintf that was there before may also need 
> to be converted to some qemI_log or error_report but I did not know what 
> these are for and did not address that. But if you want to add more then 
> it may need to be solved first.

I just followed existing fprintf use. Changing that should be separate
patch indeed.

> > +            cpu_dump_state(cs, logfile, CPU_DUMP_FPU | CPU_DUMP_CCOP);
> > +            qemu_log_unlock(logfile);
> > +        }
> >     }
> > -    cs->halted = 1;
> > -    cpu_interrupt_exittb(cs);
> > +
>
> Excess blank line?

No, it separates the logging block from function.

>
> > +    cpu_loop_exit_noexc(cs);
> > }
> >
> > #if defined(TARGET_PPC64)
> > @@ -483,7 +489,7 @@ static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
> >         break;
> >     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
> >         if (!FIELD_EX64(env->msr, MSR, ME)) {
> > -            powerpc_checkstop(env);
> > +            powerpc_checkstop(env, "machine check with MSR[ME]=0");
>
> If the message is always the same why pass it from here If the only other 
> option not used yet would be MSR[ME]=1 then that could also be checked in 
> the func so no need to pass the message. So is there any other possible 
> reason here?

To make the checkstop function more general (e.g., used by the next patch).

Thanks,
Nick
BALATON Zoltan June 28, 2023, 1:28 a.m. UTC | #3
On Wed, 28 Jun 2023, Nicholas Piggin wrote:
> On Wed Jun 28, 2023 at 3:38 AM AEST, BALATON Zoltan wrote:
>> On Tue, 27 Jun 2023, Nicholas Piggin wrote:
>>> checkstop state does not halt the system, interrupts continue to be
>>> serviced, and other CPUs run.
>>>
>>> Stop the machine with vm_stop(), and print a register dump too.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> Since v1:
>>> - Fix loop exit so it stops on the attn instruction, rather than
>>>  after it.
>>>
>>> target/ppc/excp_helper.c | 34 ++++++++++++++++++++--------------
>>> 1 file changed, 20 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
>>> index 5beda973ce..28d8a9b212 100644
>>> --- a/target/ppc/excp_helper.c
>>> +++ b/target/ppc/excp_helper.c
>>> @@ -19,6 +19,7 @@
>>> #include "qemu/osdep.h"
>>> #include "qemu/main-loop.h"
>>> #include "qemu/log.h"
>>> +#include "sysemu/runstate.h"
>>> #include "cpu.h"
>>> #include "exec/exec-all.h"
>>> #include "internal.h"
>>> @@ -186,19 +187,24 @@ static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp)
>>>              env->error_code);
>>> }
>>>
>>> -static void powerpc_checkstop(CPUPPCState *env)
>>> +static void powerpc_checkstop(CPUPPCState *env, const char *reason)
>>> {
>>>     CPUState *cs = env_cpu(env);
>>>
>>> -    /* Machine check exception is not enabled. Enter checkstop state. */
>>> -    fprintf(stderr, "Machine check while not allowed. "
>>> -            "Entering checkstop state\n");
>>> +    vm_stop(RUN_STATE_GUEST_PANICKED);
>>> +
>>> +    fprintf(stderr, "Entering checkstop state: %s\n", reason);
>>> +    cpu_dump_state(cs, stderr, CPU_DUMP_FPU | CPU_DUMP_CCOP);
>>>     if (qemu_log_separate()) {
>>> -        qemu_log("Machine check while not allowed. "
>>> -                 "Entering checkstop state\n");
>>> +        FILE *logfile = qemu_log_trylock();
>>> +        if (logfile) {
>>> +            fprintf(logfile, "Entering checkstop state: %s\n", reason);
>>
>> I don't think you should have fprintfs here. Is this remnants of debug
>> code left here by mistake? The fprintf that was there before may also need
>> to be converted to some qemI_log or error_report but I did not know what
>> these are for and did not address that. But if you want to add more then
>> it may need to be solved first.
>
> I just followed existing fprintf use. Changing that should be separate
> patch indeed.

As this is the only printf here maybe cleaning it up should come before 
adding more.

>>> +            cpu_dump_state(cs, logfile, CPU_DUMP_FPU | CPU_DUMP_CCOP);
>>> +            qemu_log_unlock(logfile);
>>> +        }
>>>     }
>>> -    cs->halted = 1;
>>> -    cpu_interrupt_exittb(cs);
>>> +
>>
>> Excess blank line?
>
> No, it separates the logging block from function.
>
>>
>>> +    cpu_loop_exit_noexc(cs);
>>> }
>>>
>>> #if defined(TARGET_PPC64)
>>> @@ -483,7 +489,7 @@ static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
>>>         break;
>>>     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
>>>         if (!FIELD_EX64(env->msr, MSR, ME)) {
>>> -            powerpc_checkstop(env);
>>> +            powerpc_checkstop(env, "machine check with MSR[ME]=0");
>>
>> If the message is always the same why pass it from here If the only other
>> option not used yet would be MSR[ME]=1 then that could also be checked in
>> the func so no need to pass the message. So is there any other possible
>> reason here?
>
> To make the checkstop function more general (e.g., used by the next patch).

OK, I did look where it's needed but missed that line in the next patch. 
That's not practical to check in the checkstop function then.

Regards,
BALATON Zoltan
Richard Henderson June 28, 2023, 9:33 a.m. UTC | #4
On 6/27/23 15:46, Nicholas Piggin wrote:
> +    vm_stop(RUN_STATE_GUEST_PANICKED);

Calling qemu_system_guest_panicked(NULL) seems to be more correct.

Though I'm not really sure the difference from cpu_abort(), which would also care for 
dumping cpu state.


r~
Nicholas Piggin June 29, 2023, 9:08 a.m. UTC | #5
On Wed Jun 28, 2023 at 7:33 PM AEST, Richard Henderson wrote:
> On 6/27/23 15:46, Nicholas Piggin wrote:
> > +    vm_stop(RUN_STATE_GUEST_PANICKED);
>
> Calling qemu_system_guest_panicked(NULL) seems to be more correct.

I'll have a look.

> Though I'm not really sure the difference from cpu_abort(), which would also care for 
> dumping cpu state.

cpu_abort() just kills qemu, so you can't inspect anything. This way
e.g., gdb server and monitor stay up. Seems like you can even re-start
it(?). This is close to what we want short of models for BMC and host
debug logic.

Thanks,
Nick
diff mbox series

Patch

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 5beda973ce..28d8a9b212 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -19,6 +19,7 @@ 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "qemu/log.h"
+#include "sysemu/runstate.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "internal.h"
@@ -186,19 +187,24 @@  static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp)
              env->error_code);
 }
 
-static void powerpc_checkstop(CPUPPCState *env)
+static void powerpc_checkstop(CPUPPCState *env, const char *reason)
 {
     CPUState *cs = env_cpu(env);
 
-    /* Machine check exception is not enabled. Enter checkstop state. */
-    fprintf(stderr, "Machine check while not allowed. "
-            "Entering checkstop state\n");
+    vm_stop(RUN_STATE_GUEST_PANICKED);
+
+    fprintf(stderr, "Entering checkstop state: %s\n", reason);
+    cpu_dump_state(cs, stderr, CPU_DUMP_FPU | CPU_DUMP_CCOP);
     if (qemu_log_separate()) {
-        qemu_log("Machine check while not allowed. "
-                 "Entering checkstop state\n");
+        FILE *logfile = qemu_log_trylock();
+        if (logfile) {
+            fprintf(logfile, "Entering checkstop state: %s\n", reason);
+            cpu_dump_state(cs, logfile, CPU_DUMP_FPU | CPU_DUMP_CCOP);
+            qemu_log_unlock(logfile);
+        }
     }
-    cs->halted = 1;
-    cpu_interrupt_exittb(cs);
+
+    cpu_loop_exit_noexc(cs);
 }
 
 #if defined(TARGET_PPC64)
@@ -483,7 +489,7 @@  static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
         break;
     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
         if (!FIELD_EX64(env->msr, MSR, ME)) {
-            powerpc_checkstop(env);
+            powerpc_checkstop(env, "machine check with MSR[ME]=0");
         }
         /* machine check exceptions don't have ME set */
         new_msr &= ~((target_ulong)1 << MSR_ME);
@@ -602,7 +608,7 @@  static void powerpc_excp_6xx(PowerPCCPU *cpu, int excp)
         break;
     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
         if (!FIELD_EX64(env->msr, MSR, ME)) {
-            powerpc_checkstop(env);
+            powerpc_checkstop(env, "machine check with MSR[ME]=0");
         }
         /* machine check exceptions don't have ME set */
         new_msr &= ~((target_ulong)1 << MSR_ME);
@@ -763,7 +769,7 @@  static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
     switch (excp) {
     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
         if (!FIELD_EX64(env->msr, MSR, ME)) {
-            powerpc_checkstop(env);
+            powerpc_checkstop(env, "machine check with MSR[ME]=0");
         }
         /* machine check exceptions don't have ME set */
         new_msr &= ~((target_ulong)1 << MSR_ME);
@@ -936,7 +942,7 @@  static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
     switch (excp) {
     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
         if (!FIELD_EX64(env->msr, MSR, ME)) {
-            powerpc_checkstop(env);
+            powerpc_checkstop(env, "machine check with MSR[ME]=0");
         }
         /* machine check exceptions don't have ME set */
         new_msr &= ~((target_ulong)1 << MSR_ME);
@@ -1119,7 +1125,7 @@  static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
         break;
     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
         if (!FIELD_EX64(env->msr, MSR, ME)) {
-            powerpc_checkstop(env);
+            powerpc_checkstop(env, "machine check with MSR[ME]=0");
         }
         /* machine check exceptions don't have ME set */
         new_msr &= ~((target_ulong)1 << MSR_ME);
@@ -1424,7 +1430,7 @@  static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
     switch (excp) {
     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
         if (!FIELD_EX64(env->msr, MSR, ME)) {
-            powerpc_checkstop(env);
+            powerpc_checkstop(env, "machine check with MSR[ME]=0");
         }
         if (env->msr_mask & MSR_HVB) {
             /*