diff mbox series

[v3,3/3] spapr: nvdimm: Enable sync-dax device property for nvdimm

Message ID 161650726635.2959.677683611241665210.stgit@6532096d84d3
State Not Applicable
Headers show
Series spapr: nvdimm: Enable sync-dax property for nvdimm | expand

Commit Message

Shivaprasad G Bhat March 23, 2021, 1:47 p.m. UTC
The patch adds the 'sync-dax' property to the nvdimm device.

When the sync-dax is 'off', the device tree property
"hcall-flush-required" is added to the nvdimm node which makes the
guest to issue H_SCM_FLUSH hcalls to request for flushes explicitly.
This would be the default behaviour without sync-dax property set
for the nvdimm device.

The sync-dax="on" would mean the guest need not make flush requests
to the qemu. On previous machine versions the sync-dax is set to be
"on" by default using the hw_compat magic.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
 hw/core/machine.c       |    1 +
 hw/mem/nvdimm.c         |    1 +
 hw/ppc/spapr_nvdimm.c   |   17 +++++++++++++++++
 include/hw/mem/nvdimm.h |   10 ++++++++++
 include/hw/ppc/spapr.h  |    1 +
 5 files changed, 30 insertions(+)

Comments

David Gibson March 24, 2021, 3:09 a.m. UTC | #1
On Tue, Mar 23, 2021 at 09:47:55AM -0400, Shivaprasad G Bhat wrote:
> The patch adds the 'sync-dax' property to the nvdimm device.
> 
> When the sync-dax is 'off', the device tree property
> "hcall-flush-required" is added to the nvdimm node which makes the
> guest to issue H_SCM_FLUSH hcalls to request for flushes explicitly.
> This would be the default behaviour without sync-dax property set
> for the nvdimm device.
> 
> The sync-dax="on" would mean the guest need not make flush requests
> to the qemu. On previous machine versions the sync-dax is set to be
> "on" by default using the hw_compat magic.
> 
> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> ---
>  hw/core/machine.c       |    1 +
>  hw/mem/nvdimm.c         |    1 +
>  hw/ppc/spapr_nvdimm.c   |   17 +++++++++++++++++
>  include/hw/mem/nvdimm.h |   10 ++++++++++
>  include/hw/ppc/spapr.h  |    1 +
>  5 files changed, 30 insertions(+)
> 
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 257a664ea2..f843643574 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -41,6 +41,7 @@ GlobalProperty hw_compat_5_2[] = {
>      { "PIIX4_PM", "smm-compat", "on"},
>      { "virtio-blk-device", "report-discard-granularity", "off" },
>      { "virtio-net-pci", "vectors", "3"},
> +    { "nvdimm", "sync-dax", "on" },
>  };
>  const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
>  
> diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
> index 7397b67156..8f0e29b191 100644
> --- a/hw/mem/nvdimm.c
> +++ b/hw/mem/nvdimm.c
> @@ -229,6 +229,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
>  
>  static Property nvdimm_properties[] = {
>      DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false),
> +    DEFINE_PROP_BOOL(NVDIMM_SYNC_DAX_PROP, NVDIMMDevice, sync_dax, false),

I'm a bit uncomfortable adding this base NVDIMM property without at
least some logic about how it's handled on non-PAPR platforms.

>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
> index 883317c1ed..dd1c90251b 100644
> --- a/hw/ppc/spapr_nvdimm.c
> +++ b/hw/ppc/spapr_nvdimm.c
> @@ -125,6 +125,9 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
>      uint64_t lsize = nvdimm->label_size;
>      uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
>                                              NULL);
> +    bool sync_dax = object_property_get_bool(OBJECT(nvdimm),
> +                                             NVDIMM_SYNC_DAX_PROP,
> +                                             &error_abort);
>  
>      drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
>      g_assert(drc);
> @@ -159,6 +162,11 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
>                               "operating-system")));
>      _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
>  
> +    if (!sync_dax) {
> +        _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
> +                         NULL, 0));
> +    }
> +
>      return child_offset;
>  }
>  
> @@ -567,10 +575,12 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
>                                        target_ulong opcode, target_ulong *args)
>  {
>      int ret;
> +    bool sync_dax;
>      uint32_t drc_index = args[0];
>      uint64_t continue_token = args[1];
>      SpaprDrc *drc = spapr_drc_by_index(drc_index);
>      PCDIMMDevice *dimm;
> +    NVDIMMDevice *nvdimm;
>      HostMemoryBackend *backend = NULL;
>      SpaprNVDIMMDeviceFlushState *state;
>      ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
> @@ -580,6 +590,13 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
>          return H_PARAMETER;
>      }
>  
> +    nvdimm = NVDIMM(drc->dev);
> +    sync_dax = object_property_get_bool(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
> +                                        &error_abort);
> +    if (sync_dax) {
> +        return H_UNSUPPORTED;

Do you want to return UNSUPPORTED here, or just H_SUCCESS, since the
flush should be a no-op in this case.

> +    }
> +
>      if (continue_token != 0) {
>          ret = spapr_nvdimm_get_flush_status(continue_token);
>          if (H_IS_LONG_BUSY(ret)) {
> diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
> index bcf62f825c..f82979cf2f 100644
> --- a/include/hw/mem/nvdimm.h
> +++ b/include/hw/mem/nvdimm.h
> @@ -51,6 +51,7 @@ OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
>  #define NVDIMM_LABEL_SIZE_PROP "label-size"
>  #define NVDIMM_UUID_PROP       "uuid"
>  #define NVDIMM_UNARMED_PROP    "unarmed"
> +#define NVDIMM_SYNC_DAX_PROP   "sync-dax"
>  
>  struct NVDIMMDevice {
>      /* private */
> @@ -85,6 +86,15 @@ struct NVDIMMDevice {
>       */
>      bool unarmed;
>  
> +    /*
> +     * On PPC64,
> +     * The 'off' value results in the hcall-flush-required property set
> +     * in the device tree for pseries machines. When 'off', the guest
> +     * initiates explicit flush requests to the backend device ensuring
> +     * write persistence.
> +     */
> +    bool sync_dax;
> +
>      /*
>       * The PPC64 - spapr requires each nvdimm device have a uuid.
>       */
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 7c27fb3e2d..51c35488a4 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -333,6 +333,7 @@ struct SpaprMachineState {
>  #define H_P7              -60
>  #define H_P8              -61
>  #define H_P9              -62
> +#define H_UNSUPPORTED     -67
>  #define H_OVERLAP         -68
>  #define H_UNSUPPORTED_FLAG -256
>  #define H_MULTI_THREADS_ACTIVE -9005
> 
>
Aneesh Kumar K V March 24, 2021, 4:09 a.m. UTC | #2
On 3/24/21 8:39 AM, David Gibson wrote:
> On Tue, Mar 23, 2021 at 09:47:55AM -0400, Shivaprasad G Bhat wrote:
>> The patch adds the 'sync-dax' property to the nvdimm device.
>>
>> When the sync-dax is 'off', the device tree property
>> "hcall-flush-required" is added to the nvdimm node which makes the
>> guest to issue H_SCM_FLUSH hcalls to request for flushes explicitly.
>> This would be the default behaviour without sync-dax property set
>> for the nvdimm device.
>>
>> The sync-dax="on" would mean the guest need not make flush requests
>> to the qemu. On previous machine versions the sync-dax is set to be
>> "on" by default using the hw_compat magic.
>>
>> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
>> ---
>>   hw/core/machine.c       |    1 +
>>   hw/mem/nvdimm.c         |    1 +
>>   hw/ppc/spapr_nvdimm.c   |   17 +++++++++++++++++
>>   include/hw/mem/nvdimm.h |   10 ++++++++++
>>   include/hw/ppc/spapr.h  |    1 +
>>   5 files changed, 30 insertions(+)
>>
>> diff --git a/hw/core/machine.c b/hw/core/machine.c
>> index 257a664ea2..f843643574 100644
>> --- a/hw/core/machine.c
>> +++ b/hw/core/machine.c
>> @@ -41,6 +41,7 @@ GlobalProperty hw_compat_5_2[] = {
>>       { "PIIX4_PM", "smm-compat", "on"},
>>       { "virtio-blk-device", "report-discard-granularity", "off" },
>>       { "virtio-net-pci", "vectors", "3"},
>> +    { "nvdimm", "sync-dax", "on" },
>>   };
>>   const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
>>   
>> diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
>> index 7397b67156..8f0e29b191 100644
>> --- a/hw/mem/nvdimm.c
>> +++ b/hw/mem/nvdimm.c
>> @@ -229,6 +229,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
>>   
>>   static Property nvdimm_properties[] = {
>>       DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false),
>> +    DEFINE_PROP_BOOL(NVDIMM_SYNC_DAX_PROP, NVDIMMDevice, sync_dax, false),
> 
> I'm a bit uncomfortable adding this base NVDIMM property without at
> least some logic about how it's handled on non-PAPR platforms.

yes these should be specific to PAPR. These are there to handle 
migration. with older guest. We can use the backing file to determine 
synchronous dax support. if it is a file backed nvdimm on a fsdax mount 
point, we can do synchronous dax. If it is one on a non dax file system 
synchronous dax can be disabled.

> 
>>       DEFINE_PROP_END_OF_LIST(),
>>   };
>>   
>> diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
>> index 883317c1ed..dd1c90251b 100644
>> --- a/hw/ppc/spapr_nvdimm.c
>> +++ b/hw/ppc/spapr_nvdimm.c
>> @@ -125,6 +125,9 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
>>       uint64_t lsize = nvdimm->label_size;
>>       uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
>>                                               NULL);
>> +    bool sync_dax = object_property_get_bool(OBJECT(nvdimm),
>> +                                             NVDIMM_SYNC_DAX_PROP,
>> +                                             &error_abort);
>>   
>>       drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
>>       g_assert(drc);
>> @@ -159,6 +162,11 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
>>                                "operating-system")));
>>       _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
>>   
>> +    if (!sync_dax) {
>> +        _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
>> +                         NULL, 0));
>> +    }
>> +
>>       return child_offset;
>>   }
>>   
>> @@ -567,10 +575,12 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>                                         target_ulong opcode, target_ulong *args)
>>   {
>>       int ret;
>> +    bool sync_dax;
>>       uint32_t drc_index = args[0];
>>       uint64_t continue_token = args[1];
>>       SpaprDrc *drc = spapr_drc_by_index(drc_index);
>>       PCDIMMDevice *dimm;
>> +    NVDIMMDevice *nvdimm;
>>       HostMemoryBackend *backend = NULL;
>>       SpaprNVDIMMDeviceFlushState *state;
>>       ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
>> @@ -580,6 +590,13 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>           return H_PARAMETER;
>>       }
>>   
>> +    nvdimm = NVDIMM(drc->dev);
>> +    sync_dax = object_property_get_bool(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
>> +                                        &error_abort);
>> +    if (sync_dax) {
>> +        return H_UNSUPPORTED;
> 
> Do you want to return UNSUPPORTED here, or just H_SUCCESS, since the
> flush should be a no-op in this case.


The reason to handle this as error is to indicate the OS that it is 
using a wrong mechanism to flush.

> 
>> +    }
>> +
>>       if (continue_token != 0) {
>>           ret = spapr_nvdimm_get_flush_status(continue_token);
>>           if (H_IS_LONG_BUSY(ret)) {
>> diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
>> index bcf62f825c..f82979cf2f 100644
>> --- a/include/hw/mem/nvdimm.h
>> +++ b/include/hw/mem/nvdimm.h
>> @@ -51,6 +51,7 @@ OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
>>   #define NVDIMM_LABEL_SIZE_PROP "label-size"
>>   #define NVDIMM_UUID_PROP       "uuid"
>>   #define NVDIMM_UNARMED_PROP    "unarmed"
>> +#define NVDIMM_SYNC_DAX_PROP   "sync-dax"
>>   
>>   struct NVDIMMDevice {
>>       /* private */
>> @@ -85,6 +86,15 @@ struct NVDIMMDevice {
>>        */
>>       bool unarmed;
>>   
>> +    /*
>> +     * On PPC64,
>> +     * The 'off' value results in the hcall-flush-required property set
>> +     * in the device tree for pseries machines. When 'off', the guest
>> +     * initiates explicit flush requests to the backend device ensuring
>> +     * write persistence.
>> +     */
>> +    bool sync_dax;
>> +
>>       /*
>>        * The PPC64 - spapr requires each nvdimm device have a uuid.
>>        */
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 7c27fb3e2d..51c35488a4 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -333,6 +333,7 @@ struct SpaprMachineState {
>>   #define H_P7              -60
>>   #define H_P8              -61
>>   #define H_P9              -62
>> +#define H_UNSUPPORTED     -67
>>   #define H_OVERLAP         -68
>>   #define H_UNSUPPORTED_FLAG -256
>>   #define H_MULTI_THREADS_ACTIVE -9005
>>
>>
>
diff mbox series

Patch

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 257a664ea2..f843643574 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -41,6 +41,7 @@  GlobalProperty hw_compat_5_2[] = {
     { "PIIX4_PM", "smm-compat", "on"},
     { "virtio-blk-device", "report-discard-granularity", "off" },
     { "virtio-net-pci", "vectors", "3"},
+    { "nvdimm", "sync-dax", "on" },
 };
 const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
 
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 7397b67156..8f0e29b191 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -229,6 +229,7 @@  static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
 
 static Property nvdimm_properties[] = {
     DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false),
+    DEFINE_PROP_BOOL(NVDIMM_SYNC_DAX_PROP, NVDIMMDevice, sync_dax, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 883317c1ed..dd1c90251b 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -125,6 +125,9 @@  static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
     uint64_t lsize = nvdimm->label_size;
     uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
                                             NULL);
+    bool sync_dax = object_property_get_bool(OBJECT(nvdimm),
+                                             NVDIMM_SYNC_DAX_PROP,
+                                             &error_abort);
 
     drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
     g_assert(drc);
@@ -159,6 +162,11 @@  static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
                              "operating-system")));
     _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
 
+    if (!sync_dax) {
+        _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
+                         NULL, 0));
+    }
+
     return child_offset;
 }
 
@@ -567,10 +575,12 @@  static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                       target_ulong opcode, target_ulong *args)
 {
     int ret;
+    bool sync_dax;
     uint32_t drc_index = args[0];
     uint64_t continue_token = args[1];
     SpaprDrc *drc = spapr_drc_by_index(drc_index);
     PCDIMMDevice *dimm;
+    NVDIMMDevice *nvdimm;
     HostMemoryBackend *backend = NULL;
     SpaprNVDIMMDeviceFlushState *state;
     ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
@@ -580,6 +590,13 @@  static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
         return H_PARAMETER;
     }
 
+    nvdimm = NVDIMM(drc->dev);
+    sync_dax = object_property_get_bool(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                        &error_abort);
+    if (sync_dax) {
+        return H_UNSUPPORTED;
+    }
+
     if (continue_token != 0) {
         ret = spapr_nvdimm_get_flush_status(continue_token);
         if (H_IS_LONG_BUSY(ret)) {
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index bcf62f825c..f82979cf2f 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -51,6 +51,7 @@  OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
 #define NVDIMM_LABEL_SIZE_PROP "label-size"
 #define NVDIMM_UUID_PROP       "uuid"
 #define NVDIMM_UNARMED_PROP    "unarmed"
+#define NVDIMM_SYNC_DAX_PROP   "sync-dax"
 
 struct NVDIMMDevice {
     /* private */
@@ -85,6 +86,15 @@  struct NVDIMMDevice {
      */
     bool unarmed;
 
+    /*
+     * On PPC64,
+     * The 'off' value results in the hcall-flush-required property set
+     * in the device tree for pseries machines. When 'off', the guest
+     * initiates explicit flush requests to the backend device ensuring
+     * write persistence.
+     */
+    bool sync_dax;
+
     /*
      * The PPC64 - spapr requires each nvdimm device have a uuid.
      */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7c27fb3e2d..51c35488a4 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -333,6 +333,7 @@  struct SpaprMachineState {
 #define H_P7              -60
 #define H_P8              -61
 #define H_P9              -62
+#define H_UNSUPPORTED     -67
 #define H_OVERLAP         -68
 #define H_UNSUPPORTED_FLAG -256
 #define H_MULTI_THREADS_ACTIVE -9005