diff mbox series

[v9,01/12] acpi/ghes: add a firmware file with HEST address

Message ID 34dd38395f29f57a19aef299bafdff9442830ed3.1724556967.git.mchehab+huawei@kernel.org
State New
Headers show
Series Add ACPI CPER firmware first error injection on ARM emulation | expand

Commit Message

Mauro Carvalho Chehab Aug. 25, 2024, 3:45 a.m. UTC
Store HEST table address at GPA, placing its content at
hest_addr_le variable.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

---

Change from v8:
- hest_addr_lr is now pointing to the error source size and data.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 hw/acpi/ghes.c         | 15 +++++++++++++++
 include/hw/acpi/ghes.h |  1 +
 2 files changed, 16 insertions(+)

Comments

Igor Mammedov Sept. 11, 2024, 1:51 p.m. UTC | #1
On Sun, 25 Aug 2024 05:45:56 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Store HEST table address at GPA, placing its content at
> hest_addr_le variable.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

This looks good to me.

in addition to this, it needs a patch on top to make sure
that we migrate hest_addr_le.
See a08a64627b6b 'ACPI: Record the Generic Error Status Block address'
and fixes on top of that for an example.

> ---
> 
> Change from v8:
> - hest_addr_lr is now pointing to the error source size and data.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> ---
>  hw/acpi/ghes.c         | 15 +++++++++++++++
>  include/hw/acpi/ghes.h |  1 +
>  2 files changed, 16 insertions(+)
> 
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index e9511d9b8f71..529c14e3289f 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -30,6 +30,7 @@
>  
>  #define ACPI_GHES_ERRORS_FW_CFG_FILE        "etc/hardware_errors"
>  #define ACPI_GHES_DATA_ADDR_FW_CFG_FILE     "etc/hardware_errors_addr"
> +#define ACPI_HEST_ADDR_FW_CFG_FILE          "etc/acpi_table_hest_addr"
>  
>  /* The max size in bytes for one error block */
>  #define ACPI_GHES_MAX_RAW_DATA_LENGTH   (1 * KiB)
> @@ -367,11 +368,22 @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
>  
>      acpi_table_begin(&table, table_data);
>  
> +    int hest_offset = table_data->len;
> +
>      /* Error Source Count */
>      build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
>      build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker);
>  
>      acpi_table_end(linker, &table);
> +
> +    /*
> +     * tell firmware to write into GPA the address of HEST via fw_cfg,
> +     * once initialized.
> +     */
> +    bios_linker_loader_write_pointer(linker,
> +                                     ACPI_HEST_ADDR_FW_CFG_FILE, 0,
> +                                     sizeof(uint64_t),
> +                                     ACPI_BUILD_TABLE_FILE, hest_offset);
>  }
>  
>  void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
> @@ -385,6 +397,9 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
>      fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
>          NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
>  
> +    fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
> +        NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);
> +
>      ags->present = true;
>  }
>  
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index 674f6958e905..28b956acb19a 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -63,6 +63,7 @@ enum {
>  };
>  
>  typedef struct AcpiGhesState {
> +    uint64_t hest_addr_le;
>      uint64_t ghes_addr_le;
>      bool present; /* True if GHES is present at all on this board */
>  } AcpiGhesState;
Mauro Carvalho Chehab Sept. 13, 2024, 5:44 a.m. UTC | #2
Em Wed, 11 Sep 2024 15:51:08 +0200
Igor Mammedov <imammedo@redhat.com> escreveu:

> On Sun, 25 Aug 2024 05:45:56 +0200
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> 
> > Store HEST table address at GPA, placing its content at
> > hest_addr_le variable.
> > 
> > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>  
> 
> This looks good to me.
> 
> in addition to this, it needs a patch on top to make sure
> that we migrate hest_addr_le.
> See a08a64627b6b 'ACPI: Record the Generic Error Status Block address'
> and fixes on top of that for an example.

Hmm... If I understood such change well, vmstate_ghes_state() will
use this structure as basis to do migration:

	/* ghes.h */
	typedef struct AcpiGhesState {
	    uint64_t hest_addr_le;
	    uint64_t ghes_addr_le;
	    bool present; /* True if GHES is present at all on this board */
	} AcpiGhesState;

	/* generic_event_device.c */
	static const VMStateDescription vmstate_ghes_state = {
	    .name = "acpi-ged/ghes",
	    .version_id = 1,
	    .minimum_version_id = 1,
	    .needed = ghes_needed,
	    .fields      = (VMStateField[]) {
	        VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
	                       vmstate_ghes_state, AcpiGhesState),
	        VMSTATE_END_OF_LIST()
	    }
	};

	/* hw/arm/virt-acpi-build.c */
	void virt_acpi_setup(VirtMachineState *vms)
	{
	    ...
	    if (vms->ras) {
	        assert(vms->acpi_dev);
	        acpi_ged_state = ACPI_GED(vms->acpi_dev);
	        acpi_ghes_add_fw_cfg(&acpi_ged_state->ghes_state,
	                             vms->fw_cfg, tables.hardware_errors);
	    }

Which relies on acpi_ghes_add_fw_cfg() function to setup callbacks for
the migration:

	/* ghes.c */
	void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
	                          GArray *hardware_error)
	{
	    /* Create a read-only fw_cfg file for GHES */
	    fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
	                    hardware_error->len);

	    /* Create a read-write fw_cfg file for Address */
	    fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
	        NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);

	    fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
	        NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);

	    ags->present = true;
	}

It sounds to me that both ghes_addr_le and hest_addr_le will be migrated
altogether.

Did I miss something?

Thanks,
Mauro
Igor Mammedov Sept. 13, 2024, 1:25 p.m. UTC | #3
On Fri, 13 Sep 2024 07:44:35 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Em Wed, 11 Sep 2024 15:51:08 +0200
> Igor Mammedov <imammedo@redhat.com> escreveu:
> 
> > On Sun, 25 Aug 2024 05:45:56 +0200
> > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> >   
> > > Store HEST table address at GPA, placing its content at
> > > hest_addr_le variable.
> > > 
> > > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>    
> > 
> > This looks good to me.
> > 
> > in addition to this, it needs a patch on top to make sure
> > that we migrate hest_addr_le.
> > See a08a64627b6b 'ACPI: Record the Generic Error Status Block address'
> > and fixes on top of that for an example.  
> 
> Hmm... If I understood such change well, vmstate_ghes_state() will
> use this structure as basis to do migration:
> 
> 	/* ghes.h */
> 	typedef struct AcpiGhesState {
> 	    uint64_t hest_addr_le;
> 	    uint64_t ghes_addr_le;
> 	    bool present; /* True if GHES is present at all on this board */
> 	} AcpiGhesState;
> 
> 	/* generic_event_device.c */
> 	static const VMStateDescription vmstate_ghes_state = {
> 	    .name = "acpi-ged/ghes",
> 	    .version_id = 1,
> 	    .minimum_version_id = 1,
> 	    .needed = ghes_needed,
> 	    .fields      = (VMStateField[]) {
> 	        VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
> 	                       vmstate_ghes_state, AcpiGhesState),
> 	        VMSTATE_END_OF_LIST()
> 	    }
> 	};

current code looks like that:
                                                                                 
static const VMStateDescription vmstate_ghes = {                                 
    .name = "acpi-ghes",                                                         
    .version_id = 1,                                                             
    .minimum_version_id = 1,                                                     
    .fields = (const VMStateField[]) {                                           
        VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),   <<===                         
        VMSTATE_END_OF_LIST()                                                    
    },                                                                           
};                                                                               
                                                                                 
static bool ghes_needed(void *opaque)                                            
{                                                                                
    AcpiGedState *s = opaque;                                                    
    return s->ghes_state.ghes_addr_le;                                           
}                                                                                
                                                                                 
static const VMStateDescription vmstate_ghes_state = {                           
    .name = "acpi-ged/ghes",                                                     
    .version_id = 1,                                                             
    .minimum_version_id = 1,                                                     
    .needed = ghes_needed,                                                       
    .fields = (const VMStateField[]) {                                           
        VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,                              
                       vmstate_ghes, AcpiGhesState),                             
        VMSTATE_END_OF_LIST()                                                    
    }                                                                            
};  

where 
    VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
explicitly defines field(s) within structure to be sent over wire.

we need to add a conditional field for ghes_addr_le
which will be sent only with new machine types, but not with old ones
to avoid migration breakage.

I don't know much about migration, but maybe we can get away with
similar condition as in ghes_needed(), or enabling QMP error injection
based on machine type version.

Or maybe adding a CLI option to enable QMP error injection in which
case the explicit option would serve as a trigger enable QMP command and
to migrate hest_addr_le.
It might be even better this way, since machine wouldn't need to
carry extra error source that will be used only for testing
and practically never in production VMs (aka reduced attack surface).

You can easily test it locally:
  new-qemu: with your patches
  old-qemu: qemu-9.1

and then try to do forth & back migration for following cases:
  1. (ping-pong case with OLD firmware/ACPI tables)
     start old-qemu with 9.1 machine type ->
       migrate to file ->
       start new-qemu with 9.1 machine type -> restore from file ->
       migrate to file ->
       start old-qemu with 9.1 machine type ->restore from file ->
       
  2.  (ping-pong case with NEW firmware/ACPI tables)
      do the same as #1 but starting with new-qemu binary

(from upstream pov #2 is optional, but not implementing it
is pain for downstream so it's better to have it if it's not
too much work)

> 	/* hw/arm/virt-acpi-build.c */
> 	void virt_acpi_setup(VirtMachineState *vms)
> 	{
> 	    ...
> 	    if (vms->ras) {
> 	        assert(vms->acpi_dev);
> 	        acpi_ged_state = ACPI_GED(vms->acpi_dev);
> 	        acpi_ghes_add_fw_cfg(&acpi_ged_state->ghes_state,
> 	                             vms->fw_cfg, tables.hardware_errors);
> 	    }
> 
> Which relies on acpi_ghes_add_fw_cfg() function to setup callbacks for
> the migration:
> 
> 	/* ghes.c */
> 	void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
> 	                          GArray *hardware_error)
> 	{
> 	    /* Create a read-only fw_cfg file for GHES */
> 	    fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
> 	                    hardware_error->len);
> 
> 	    /* Create a read-write fw_cfg file for Address */
> 	    fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
> 	        NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
> 
> 	    fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
> 	        NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);
> 
> 	    ags->present = true;
> 	}
> 
> It sounds to me that both ghes_addr_le and hest_addr_le will be migrated
> altogether.

fwcfg callbacks are irrelevant to migration, they tell firmware what to do
with specified addresses (in our case, write into ags->hest_addr_le address
of HEST), so that HW (qemu) would know where firmware placed the table.
But that's about all it does.

> 
> Did I miss something?
> 
> Thanks,
> Mauro
>
Mauro Carvalho Chehab Sept. 14, 2024, 5:33 a.m. UTC | #4
Hi Igor,

Em Fri, 13 Sep 2024 15:25:18 +0200
Igor Mammedov <imammedo@redhat.com> escreveu:

> > > in addition to this, it needs a patch on top to make sure
> > > that we migrate hest_addr_le.
> > > See a08a64627b6b 'ACPI: Record the Generic Error Status Block address'
> > > and fixes on top of that for an example.    
> > 
> > Hmm... If I understood such change well, vmstate_ghes_state() will
> > use this structure as basis to do migration:
> > 
> > 	/* ghes.h */
> > 	typedef struct AcpiGhesState {
> > 	    uint64_t hest_addr_le;
> > 	    uint64_t ghes_addr_le;
> > 	    bool present; /* True if GHES is present at all on this board */
> > 	} AcpiGhesState;
> > 
> > 	/* generic_event_device.c */
> > 	static const VMStateDescription vmstate_ghes_state = {
> > 	    .name = "acpi-ged/ghes",
> > 	    .version_id = 1,
> > 	    .minimum_version_id = 1,
> > 	    .needed = ghes_needed,
> > 	    .fields      = (VMStateField[]) {
> > 	        VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
> > 	                       vmstate_ghes_state, AcpiGhesState),
> > 	        VMSTATE_END_OF_LIST()
> > 	    }
> > 	};  
> 
> current code looks like that:
>                                                                                  
> static const VMStateDescription vmstate_ghes = {                                 
>     .name = "acpi-ghes",                                                         
>     .version_id = 1,                                                             
>     .minimum_version_id = 1,                                                     
>     .fields = (const VMStateField[]) {                                           
>         VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),   <<===                         
>         VMSTATE_END_OF_LIST()                                                    
>     },                                                                           
> };                                                                               
>                                                                                  
> static bool ghes_needed(void *opaque)                                            
> {                                                                                
>     AcpiGedState *s = opaque;                                                    
>     return s->ghes_state.ghes_addr_le;                                           
> }                                                                                
>                                                                                  
> static const VMStateDescription vmstate_ghes_state = {                           
>     .name = "acpi-ged/ghes",                                                     
>     .version_id = 1,                                                             
>     .minimum_version_id = 1,                                                     
>     .needed = ghes_needed,                                                       
>     .fields = (const VMStateField[]) {                                           
>         VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,                              
>                        vmstate_ghes, AcpiGhesState),                             
>         VMSTATE_END_OF_LIST()                                                    
>     }                                                                            
> };  
> 
> where 
>     VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
> explicitly defines field(s) within structure to be sent over wire.
> 
> we need to add a conditional field for ghes_addr_le
> which will be sent only with new machine types, but not with old ones
> to avoid migration breakage.
> 
> I don't know much about migration, but maybe we can get away with
> similar condition as in ghes_needed(), or enabling QMP error injection
> based on machine type version.
> 
> Or maybe adding a CLI option to enable QMP error injection in which
> case the explicit option would serve as a trigger enable QMP command and
> to migrate hest_addr_le.
> It might be even better this way, since machine wouldn't need to
> carry extra error source that will be used only for testing
> and practically never in production VMs (aka reduced attack surface).
> 
> You can easily test it locally:
>   new-qemu: with your patches
>   old-qemu: qemu-9.1
> 
> and then try to do forth & back migration for following cases:
>   1. (ping-pong case with OLD firmware/ACPI tables)
>      start old-qemu with 9.1 machine type ->
>        migrate to file ->
>        start new-qemu with 9.1 machine type -> restore from file ->
>        migrate to file ->

As I never used migration, I'm a little stuck with the command line
parameters.

I guess I got the one to do the migration at the monitor:

	(qemu) migrate file://tmp/migrate

But no idea how to start a machine using a saved state.

>        start old-qemu with 9.1 machine type ->restore from file ->
>        
>   2.  (ping-pong case with NEW firmware/ACPI tables)
>       do the same as #1 but starting with new-qemu binary
> 
> (from upstream pov #2 is optional, but not implementing it
> is pain for downstream so it's better to have it if it's not
> too much work)

If I understood the migration documentation, every when new fields
are added, we should increment .version_id. If new version is
not backward-compatible, .minimum_version_id is also incremented.

So, for a migration-compatible code with a 9.1 VM, the code needs to
handle the case where hest_addr_le is not defined, e. g. use offsets
relative to ghes_addr_le, just like the current version, e.g.:

    uint64_t cper_addr, read_ack_start_addr;

    AcpiGedState *acpi_ged_state =
        ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL));
    AcpiGhesState *ags = &acpi_ged_state->ghes_state;

    if (!ags->hest_addr_le) {
        // Backward-compatible migration code
        uint64_t base = le64_to_cpu(ags->ghes_addr_le);

        *read_ack_start_addr = base +
            ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
            error_source_to_index[notify] * sizeof(uint64_t);

        *cper_addr = base +
            ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
            ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
            error_source_to_index[notify] * ACPI_GHES_MAX_RAW_DATA_LENGTH;
   } else {
        // Use the new logic from ags->hest_addr_le
   }

There are two problems with that:

1. On your reviews, if I understood right, the code above is not
   migration safe. So, while implementing it would be technically
   correct, migration still won't work;

2. With the new code, ACPI_GHES_ERROR_SOURCE_COUNT is not
   defined anymore, as the size of the error source structure can
   be different on different architectures, being 2 for new
   VMs and 1 for old ones.

   Basically the new code gets it right because it can see a
   pointer to the HEST table, so it can get the number from there:

	hest_addr = le64_to_cpu(ags->hest_addr_le);
	cpu_physical_memory_read(hest_addr, &num_sources, sizeof(num_sources));

   But, without hest_addr_le, getting num_sources is not possible.

   An alternative would be to add a hacky code that works only for
   arm machines (as new versions may support more archs).

   Something like:
	#define V1_ARM_ACPI_GHES_ERROR_SOURCE_COUNT 1
	#define V2_ARM_ACPI_GHES_ERROR_SOURCE_COUNT 2

   And have a hardcoded logic that would work before/after this
   changeset but may break on newer versions, if the number of
   source IDs change, if we add other HEST types, etc.

   Now, assuming that such hack would work, it sounds too hacky to 
   my taste.

So, IMO it is a lot safer to not support migrations from v1 (only
ghes_addr_le), using a patch like the enclosed one to ensure that.

Btw, checking existing migration structs, it sounds that for almost all
structures, .version_id is identical to .minimum_version_id, meaning that
migration between different versions aren't supported on most cases.

Thanks,
Mauro

---

[PATCH] acpi/generic_event_device: Update GHES migration to cover hest addr

The GHES migration logic at GED should now support HEST table
location too.

Increase migration version and change needed to check for both
ghes_addr_le and hest_addr_le.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index b4c83a089a02..efae0ff62c7b 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -351,10 +351,11 @@ static const VMStateDescription vmstate_ged_state = {
 
 static const VMStateDescription vmstate_ghes = {
     .name = "acpi-ghes",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .fields = (const VMStateField[]) {
         VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
+        VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -362,13 +363,13 @@ static const VMStateDescription vmstate_ghes = {
 static bool ghes_needed(void *opaque)
 {
     AcpiGedState *s = opaque;
-    return s->ghes_state.ghes_addr_le;
+    return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le;
 }
 
 static const VMStateDescription vmstate_ghes_state = {
     .name = "acpi-ged/ghes",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .needed = ghes_needed,
     .fields = (const VMStateField[]) {
         VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
Igor Mammedov Sept. 16, 2024, 11:05 a.m. UTC | #5
On Sat, 14 Sep 2024 07:33:14 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Hi Igor,
> 
> Em Fri, 13 Sep 2024 15:25:18 +0200
> Igor Mammedov <imammedo@redhat.com> escreveu:
> 
> > > > in addition to this, it needs a patch on top to make sure
> > > > that we migrate hest_addr_le.
> > > > See a08a64627b6b 'ACPI: Record the Generic Error Status Block address'
> > > > and fixes on top of that for an example.      
> > > 
> > > Hmm... If I understood such change well, vmstate_ghes_state() will
> > > use this structure as basis to do migration:
> > > 
> > > 	/* ghes.h */
> > > 	typedef struct AcpiGhesState {
> > > 	    uint64_t hest_addr_le;
> > > 	    uint64_t ghes_addr_le;
> > > 	    bool present; /* True if GHES is present at all on this board */
> > > 	} AcpiGhesState;
> > > 
> > > 	/* generic_event_device.c */
> > > 	static const VMStateDescription vmstate_ghes_state = {
> > > 	    .name = "acpi-ged/ghes",
> > > 	    .version_id = 1,
> > > 	    .minimum_version_id = 1,
> > > 	    .needed = ghes_needed,
> > > 	    .fields      = (VMStateField[]) {
> > > 	        VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
> > > 	                       vmstate_ghes_state, AcpiGhesState),
> > > 	        VMSTATE_END_OF_LIST()
> > > 	    }
> > > 	};    
> > 
> > current code looks like that:
> >                                                                                  
> > static const VMStateDescription vmstate_ghes = {                                 
> >     .name = "acpi-ghes",                                                         
> >     .version_id = 1,                                                             
> >     .minimum_version_id = 1,                                                     
> >     .fields = (const VMStateField[]) {                                           
> >         VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),   <<===                         
> >         VMSTATE_END_OF_LIST()                                                    
> >     },                                                                           
> > };                                                                               
> >                                                                                  
> > static bool ghes_needed(void *opaque)                                            
> > {                                                                                
> >     AcpiGedState *s = opaque;                                                    
> >     return s->ghes_state.ghes_addr_le;                                           
> > }                                                                                
> >                                                                                  
> > static const VMStateDescription vmstate_ghes_state = {                           
> >     .name = "acpi-ged/ghes",                                                     
> >     .version_id = 1,                                                             
> >     .minimum_version_id = 1,                                                     
> >     .needed = ghes_needed,                                                       
> >     .fields = (const VMStateField[]) {                                           
> >         VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,                              
> >                        vmstate_ghes, AcpiGhesState),                             
> >         VMSTATE_END_OF_LIST()                                                    
> >     }                                                                            
> > };  
> > 
> > where 
> >     VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
> > explicitly defines field(s) within structure to be sent over wire.
> > 
> > we need to add a conditional field for ghes_addr_le
> > which will be sent only with new machine types, but not with old ones
> > to avoid migration breakage.
> > 
> > I don't know much about migration, but maybe we can get away with
> > similar condition as in ghes_needed(), or enabling QMP error injection
> > based on machine type version.
> > 
> > Or maybe adding a CLI option to enable QMP error injection in which
> > case the explicit option would serve as a trigger enable QMP command and
> > to migrate hest_addr_le.
> > It might be even better this way, since machine wouldn't need to
> > carry extra error source that will be used only for testing
> > and practically never in production VMs (aka reduced attack surface).
> > 
> > You can easily test it locally:
> >   new-qemu: with your patches
> >   old-qemu: qemu-9.1
> > 
> > and then try to do forth & back migration for following cases:
> >   1. (ping-pong case with OLD firmware/ACPI tables)
> >      start old-qemu with 9.1 machine type ->
> >        migrate to file ->
> >        start new-qemu with 9.1 machine type -> restore from file ->
> >        migrate to file ->  
> 
> As I never used migration, I'm a little stuck with the command line
> parameters.
> 
> I guess I got the one to do the migration at the monitor:
> 
> 	(qemu) migrate file://tmp/migrate
> 
> But no idea how to start a machine using a saved state.

see https://www.linux-kvm.org/page/Migration
'savevm/loadvm to an external state file (using pseudo-migration)' section

> 
> >        start old-qemu with 9.1 machine type ->restore from file ->
> >        
> >   2.  (ping-pong case with NEW firmware/ACPI tables)
> >       do the same as #1 but starting with new-qemu binary
> > 
> > (from upstream pov #2 is optional, but not implementing it
> > is pain for downstream so it's better to have it if it's not
> > too much work)  
> 
> If I understood the migration documentation, every when new fields
> are added, we should increment .version_id. If new version is
> not backward-compatible, .minimum_version_id is also incremented.
> 
> So, for a migration-compatible code with a 9.1 VM, the code needs to
> handle the case where hest_addr_le is not defined, e. g. use offsets
> relative to ghes_addr_le, just like the current version, e.g.:
> 
>     uint64_t cper_addr, read_ack_start_addr;
> 
>     AcpiGedState *acpi_ged_state =
>         ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL));
>     AcpiGhesState *ags = &acpi_ged_state->ghes_state;
> 
>     if (!ags->hest_addr_le) {
>         // Backward-compatible migration code
>         uint64_t base = le64_to_cpu(ags->ghes_addr_le);
> 
>         *read_ack_start_addr = base +
>             ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
>             error_source_to_index[notify] * sizeof(uint64_t);
> 
>         *cper_addr = base +
>             ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
>             ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
>             error_source_to_index[notify] * ACPI_GHES_MAX_RAW_DATA_LENGTH;
>    } else {
>         // Use the new logic from ags->hest_addr_le
>    }
> 
> There are two problems with that:
> 
> 1. On your reviews, if I understood right, the code above is not
>    migration safe. So, while implementing it would be technically
>    correct, migration still won't work;
> 
> 2. With the new code, ACPI_GHES_ERROR_SOURCE_COUNT is not
>    defined anymore, as the size of the error source structure can
>    be different on different architectures, being 2 for new
>    VMs and 1 for old ones.
> 
>    Basically the new code gets it right because it can see a
>    pointer to the HEST table, so it can get the number from there:
> 
> 	hest_addr = le64_to_cpu(ags->hest_addr_le);
> 	cpu_physical_memory_read(hest_addr, &num_sources, sizeof(num_sources));
> 
>    But, without hest_addr_le, getting num_sources is not possible.
> 
>    An alternative would be to add a hacky code that works only for
>    arm machines (as new versions may support more archs).
> 
>    Something like:
> 	#define V1_ARM_ACPI_GHES_ERROR_SOURCE_COUNT 1
> 	#define V2_ARM_ACPI_GHES_ERROR_SOURCE_COUNT 2
> 
>    And have a hardcoded logic that would work before/after this
>    changeset but may break on newer versions, if the number of
>    source IDs change, if we add other HEST types, etc.
> 
>    Now, assuming that such hack would work, it sounds too hacky to 
>    my taste.
> 
> So, IMO it is a lot safer to not support migrations from v1 (only
> ghes_addr_le), using a patch like the enclosed one to ensure that.
> 
> Btw, checking existing migration structs, it sounds that for almost all
> structures, .version_id is identical to .minimum_version_id, meaning that
> migration between different versions aren't supported on most cases.

let me try to find more examples on how to implement migration bits
hopefully more comprehensible. 

> 
> Thanks,
> Mauro
> 
> ---
> 
> [PATCH] acpi/generic_event_device: Update GHES migration to cover hest addr
> 
> The GHES migration logic at GED should now support HEST table
> location too.
> 
> Increase migration version and change needed to check for both
> ghes_addr_le and hest_addr_le.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> 
> diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
> index b4c83a089a02..efae0ff62c7b 100644
> --- a/hw/acpi/generic_event_device.c
> +++ b/hw/acpi/generic_event_device.c
> @@ -351,10 +351,11 @@ static const VMStateDescription vmstate_ged_state = {
>  
>  static const VMStateDescription vmstate_ghes = {
>      .name = "acpi-ghes",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 2,
>      .fields = (const VMStateField[]) {
>          VMSTATE_UINT64(ghes_addr_le, AcpiGhesState),
> +        VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
>          VMSTATE_END_OF_LIST()
>      },
>  };
> @@ -362,13 +363,13 @@ static const VMStateDescription vmstate_ghes = {
>  static bool ghes_needed(void *opaque)
>  {
>      AcpiGedState *s = opaque;
> -    return s->ghes_state.ghes_addr_le;
> +    return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le;
>  }
>  
>  static const VMStateDescription vmstate_ghes_state = {
>      .name = "acpi-ged/ghes",
> -    .version_id = 1,
> -    .minimum_version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 2,
>      .needed = ghes_needed,
>      .fields = (const VMStateField[]) {
>          VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
> 
>
Mauro Carvalho Chehab Oct. 1, 2024, 8:54 a.m. UTC | #6
Em Mon, 16 Sep 2024 13:05:06 +0200
Igor Mammedov <imammedo@redhat.com> escreveu:

> > But no idea how to start a machine using a saved state.  
> 
> see https://www.linux-kvm.org/page/Migration
> 'savevm/loadvm to an external state file (using pseudo-migration)' section
> 

It didn't work. Is migration currently working between 9.1 and 9.2?

I did a compilation of qemu version v9.1.0-rc0 and saved the state.

Then, on vanilla 9.2 (changeset 01dc65a3bc26), I tried to restore the
state with both "virt" and "virt-9.1". None worked:


$ qemu-system-aarch64 -m 4g,maxmem=8G,slots=8 -M type=virt-9.1,nvdimm=on,gic-version=3,ras=on -cpu max -smp 4 -numa node,nodeid=0,cpus=0-3,memdev=mem0 --nographic -monitor telnet:127.0.0.1:1234,server,nowait -incoming "exec: gzip -c -d statefile.gz" -no-reboot -bios /new_devel/edac/emulator/QEMU_EFI-silent.fd -kernel /new_devel/edac/work/arm64_build/arch/arm64/boot/Image.gz -device pcie-root-port,id=root_port1 -device virtio-blk-pci,drive=hd -device virtio-net-pci,netdev=mynet,id=bob -drive if=none,file=/new_devel/edac/emulator/debian.qcow2,format=qcow2,id=hd -object memory-backend-ram,size=4G,id=mem0 -netdev type=user,id=mynet,hostfwd=tcp::5555-:22 -qmp tcp:localhost:4445,server=on,wait=off -append 'earlycon nomodeset root=/dev/vda1 fsck.mode=skip tp_printk maxcpus=4'
qemu-system-aarch64: error while loading state for instance 0x0 of device 'cpu'
qemu-system-aarch64: load of migration failed: Operation not permitted

$ qemu-system-aarch64 -m 4g,maxmem=8G,slots=8 -M type=virt,nvdimm=on,gic-version=3,ras=on -cpu max -smp 4 -numa node,nodeid=0,cpus=0-3,memdev=mem0 --nographic -monitor telnet:127.0.0.1:1234,server,nowait -incoming "exec: gzip -c -d statefile.gz" -no-reboot -bios /new_devel/edac/emulator/QEMU_EFI-silent.fd -kernel /new_devel/edac/work/arm64_build/arch/arm64/boot/Image.gz -device pcie-root-port,id=root_port1 -device virtio-blk-pci,drive=hd -device virtio-net-pci,netdev=mynet,id=bob -drive if=none,file=/new_devel/edac/emulator/debian.qcow2,format=qcow2,id=hd -object memory-backend-ram,size=4G,id=mem0 -netdev type=user,id=mynet,hostfwd=tcp::5555-:22 -qmp tcp:localhost:4445,server=on,wait=off -append 'earlycon nomodeset root=/dev/vda1 fsck.mode=skip tp_printk maxcpus=4'
qemu-system-aarch64: Machine type received is 'virt-9.1' and local is 'virt-9.2'
qemu-system-aarch64: load of migration failed: Invalid argument

Did I made something wrong?

Regards,
Mauro
Igor Mammedov Oct. 3, 2024, 11:48 a.m. UTC | #7
On Tue, 1 Oct 2024 10:54:26 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Em Mon, 16 Sep 2024 13:05:06 +0200
> Igor Mammedov <imammedo@redhat.com> escreveu:
> 
> > > But no idea how to start a machine using a saved state.    
> > 
> > see https://www.linux-kvm.org/page/Migration
> > 'savevm/loadvm to an external state file (using pseudo-migration)' section
> >   
> 
> It didn't work. Is migration currently working between 9.1 and 9.2?
> 
> I did a compilation of qemu version v9.1.0-rc0 and saved the state.
but it's better to use actually released 9.1 source code.
that should be effectively virt-9.1 machine type
(unless something changed between rc0 and release version)

show us CLI on source side (i.e. qemu instance where you saved state,
it should be the same as below modulo '-incoming' option)

> 
> Then, on vanilla 9.2 (changeset 01dc65a3bc26), I tried to restore the
> state with both "virt" and "virt-9.1". None worked:
> 
> 
> $ qemu-system-aarch64 -m 4g,maxmem=8G,slots=8 -M type=virt-9.1,nvdimm=on,gic-version=3,ras=on -cpu max -smp 4 -numa node,nodeid=0,cpus=0-3,memdev=mem0 --nographic -monitor telnet:127.0.0.1:1234,server,nowait -incoming "exec: gzip -c -d statefile.gz" -no-reboot -bios /new_devel/edac/emulator/QEMU_EFI-silent.fd -kernel /new_devel/edac/work/arm64_build/arch/arm64/boot/Image.gz -device pcie-root-port,id=root_port1 -device virtio-blk-pci,drive=hd -device virtio-net-pci,netdev=mynet,id=bob -drive if=none,file=/new_devel/edac/emulator/debian.qcow2,format=qcow2,id=hd -object memory-backend-ram,size=4G,id=mem0 -netdev type=user,id=mynet,hostfwd=tcp::5555-:22 -qmp tcp:localhost:4445,server=on,wait=off -append 'earlycon nomodeset root=/dev/vda1 fsck.mode=skip tp_printk maxcpus=4'
> qemu-system-aarch64: error while loading state for instance 0x0 of device 'cpu'
> qemu-system-aarch64: load of migration failed: Operation not permitted

and that should've worked

 
> $ qemu-system-aarch64 -m 4g,maxmem=8G,slots=8 -M type=virt,nvdimm=on,gic-version=3,ras=on -cpu max -smp 4 -numa node,nodeid=0,cpus=0-3,memdev=mem0 --nographic -monitor telnet:127.0.0.1:1234,server,nowait -incoming "exec: gzip -c -d statefile.gz" -no-reboot -bios /new_devel/edac/emulator/QEMU_EFI-silent.fd -kernel /new_devel/edac/work/arm64_build/arch/arm64/boot/Image.gz -device pcie-root-port,id=root_port1 -device virtio-blk-pci,drive=hd -device virtio-net-pci,netdev=mynet,id=bob -drive if=none,file=/new_devel/edac/emulator/debian.qcow2,format=qcow2,id=hd -object memory-backend-ram,size=4G,id=mem0 -netdev type=user,id=mynet,hostfwd=tcp::5555-:22 -qmp tcp:localhost:4445,server=on,wait=off -append 'earlycon nomodeset root=/dev/vda1 fsck.mode=skip tp_printk maxcpus=4'
> qemu-system-aarch64: Machine type received is 'virt-9.1' and local is 'virt-9.2'
> qemu-system-aarch64: load of migration failed: Invalid argument

that isn't meant to work.

> 
> Did I made something wrong?

let's see src CLI first, and then ask someone who knows more about migration and how to troubleshoot it

> Regards,
> Mauro
>
diff mbox series

Patch

diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index e9511d9b8f71..529c14e3289f 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -30,6 +30,7 @@ 
 
 #define ACPI_GHES_ERRORS_FW_CFG_FILE        "etc/hardware_errors"
 #define ACPI_GHES_DATA_ADDR_FW_CFG_FILE     "etc/hardware_errors_addr"
+#define ACPI_HEST_ADDR_FW_CFG_FILE          "etc/acpi_table_hest_addr"
 
 /* The max size in bytes for one error block */
 #define ACPI_GHES_MAX_RAW_DATA_LENGTH   (1 * KiB)
@@ -367,11 +368,22 @@  void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
 
     acpi_table_begin(&table, table_data);
 
+    int hest_offset = table_data->len;
+
     /* Error Source Count */
     build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
     build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker);
 
     acpi_table_end(linker, &table);
+
+    /*
+     * tell firmware to write into GPA the address of HEST via fw_cfg,
+     * once initialized.
+     */
+    bios_linker_loader_write_pointer(linker,
+                                     ACPI_HEST_ADDR_FW_CFG_FILE, 0,
+                                     sizeof(uint64_t),
+                                     ACPI_BUILD_TABLE_FILE, hest_offset);
 }
 
 void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
@@ -385,6 +397,9 @@  void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
     fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
         NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
 
+    fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
+        NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);
+
     ags->present = true;
 }
 
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 674f6958e905..28b956acb19a 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -63,6 +63,7 @@  enum {
 };
 
 typedef struct AcpiGhesState {
+    uint64_t hest_addr_le;
     uint64_t ghes_addr_le;
     bool present; /* True if GHES is present at all on this board */
 } AcpiGhesState;