Message ID | 20190614155626.27932-6-tao3.xu@intel.com |
---|---|
State | New |
Headers | show |
Series | Build ACPI Heterogeneous Memory Attribute Table (HMAT) | expand |
On Fri, 14 Jun 2019 23:56:23 +0800 Tao Xu <tao3.xu@intel.com> wrote: > Add build_mem_ranges callback to AcpiDeviceIfClass and use > it for generating SRAT and HMAT numa memory ranges. > > Suggested-by: Igor Mammedov <imammedo@redhat.com> > Co-developed-by: Liu Jingqi <jingqi.liu@intel.com> > Signed-off-by: Liu Jingqi <jingqi.liu@intel.com> > Signed-off-by: Tao Xu <tao3.xu@intel.com> > --- > > Changes in v5 -> v4: > - Add the missing if 'mem_len > 0' in pc_build_mem_ranges() (Igor) > - Correct the descriptions of build_mem_ranges > in AcpiDeviceIfClass (Igor) > - Use GArray for NUMA memory ranges data (Igor) > - Add the reason of using stub (Igor) > --- > hw/acpi/piix4.c | 1 + > hw/i386/acpi-build.c | 133 +++++++++++++++++---------- > hw/isa/lpc_ich9.c | 1 + > include/hw/acpi/acpi_dev_interface.h | 4 + > include/hw/i386/pc.h | 1 + > include/sysemu/numa.h | 12 +++ > stubs/Makefile.objs | 1 + > stubs/pc_build_mem_ranges.c | 14 +++ > 8 files changed, 120 insertions(+), 47 deletions(-) > create mode 100644 stubs/pc_build_mem_ranges.c > > diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c > index ec4e186cec..bc078c1ad7 100644 > --- a/hw/acpi/piix4.c > +++ b/hw/acpi/piix4.c > @@ -702,6 +702,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) > adevc->ospm_status = piix4_ospm_status; > adevc->send_event = piix4_send_gpe; > adevc->madt_cpu = pc_madt_cpu_entry; > + adevc->build_mem_ranges = pc_build_mem_ranges; > } > > static const TypeInfo piix4_pm_info = { > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 055e677c30..44dd447fa5 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -2279,18 +2279,89 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) > #define HOLE_640K_START (640 * KiB) > #define HOLE_640K_END (1 * MiB) > > +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms) > +{ > + uint64_t mem_len, mem_base, next_base; > + int i; > + PCMachineState *pcms = PC_MACHINE(ms); > + NumaState *nstat = ms->numa_state; > + NumaMemRange *mem_range; > + nstat->mem_ranges_num = 0; > + next_base = 0; > + > + /* > + * the memory map is a bit tricky, it contains at least one hole > + * from 640k-1M and possibly another one from 3.5G-4G. > + */ > + > + for (i = 0; i < pcms->numa_nodes; ++i) { > + mem_base = next_base; > + mem_len = pcms->node_mem[i]; > + next_base = mem_base + mem_len; > + > + /* Cut out the 640K hole */ > + if (mem_base <= HOLE_640K_START && > + next_base > HOLE_640K_START) { > + mem_len -= next_base - HOLE_640K_START; > + if (mem_len > 0) { > + mem_range = acpi_data_push(nstat->mem_ranges, > + sizeof *mem_range); > + mem_range->base = mem_base; > + mem_range->length = mem_len; > + mem_range->node = i; > + nstat->mem_ranges_num++; > + } > + > + /* Check for the rare case: 640K < RAM < 1M */ > + if (next_base <= HOLE_640K_END) { > + next_base = HOLE_640K_END; > + continue; > + } > + mem_base = HOLE_640K_END; > + mem_len = next_base - HOLE_640K_END; > + } > + > + /* Cut out the ACPI_PCI hole */ > + if (mem_base <= pcms->below_4g_mem_size && > + next_base > pcms->below_4g_mem_size) { > + mem_len -= next_base - pcms->below_4g_mem_size; > + if (mem_len > 0) { > + mem_range = acpi_data_push(nstat->mem_ranges, > + sizeof *mem_range); > + mem_range->base = mem_base; > + mem_range->length = mem_len; > + mem_range->node = i; > + nstat->mem_ranges_num++; > + } > + mem_base = 1ULL << 32; > + mem_len = next_base - pcms->below_4g_mem_size; > + next_base = mem_base + mem_len; > + } > + if (mem_len > 0) { > + mem_range = acpi_data_push(nstat->mem_ranges, > + sizeof *mem_range); > + mem_range->base = mem_base; > + mem_range->length = mem_len; > + mem_range->node = i; > + nstat->mem_ranges_num++; > + } > + } > +} > + > static void > build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > { > AcpiSystemResourceAffinityTable *srat; > AcpiSratMemoryAffinity *numamem; > > - int i; > - int srat_start, numa_start, slots; > - uint64_t mem_len, mem_base, next_base; > + int i, srat_start, numa_start, slots; > MachineClass *mc = MACHINE_GET_CLASS(machine); > const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); > PCMachineState *pcms = PC_MACHINE(machine); > + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); > + AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev); > + NumaState *nstat = machine->numa_state; > + NumaMemRange *mem_range; > ram_addr_t hotplugabble_address_space_size = > object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, > NULL); > @@ -2327,57 +2398,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > } > } > > + if (pcms->numa_nodes && !nstat->mem_ranges_num) { suggest to drop nstat->mem_ranges_num field and use nstat->mem_ranges->len instead, also its probably better to initialize nstat->mem_ranges at the same place where ms->numa_state is initialized, so that a specific platform code won't need to duplicate it. > + nstat->mem_ranges = g_array_new(false, true /* clear */, > + sizeof *mem_range); > + adevc->build_mem_ranges(adev, machine); > + } > > - /* the memory map is a bit tricky, it contains at least one hole > - * from 640k-1M and possibly another one from 3.5G-4G. > - */ > - next_base = 0; > numa_start = table_data->len; > > - for (i = 1; i < pcms->numa_nodes + 1; ++i) { > - mem_base = next_base; > - mem_len = pcms->node_mem[i - 1]; > - next_base = mem_base + mem_len; > - > - /* Cut out the 640K hole */ > - if (mem_base <= HOLE_640K_START && > - next_base > HOLE_640K_START) { > - mem_len -= next_base - HOLE_640K_START; > - if (mem_len > 0) { > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, mem_base, mem_len, i - 1, > - MEM_AFFINITY_ENABLED); > - } > - > - /* Check for the rare case: 640K < RAM < 1M */ > - if (next_base <= HOLE_640K_END) { > - next_base = HOLE_640K_END; > - continue; > - } > - mem_base = HOLE_640K_END; > - mem_len = next_base - HOLE_640K_END; > - } > - > - /* Cut out the ACPI_PCI hole */ > - if (mem_base <= pcms->below_4g_mem_size && > - next_base > pcms->below_4g_mem_size) { > - mem_len -= next_base - pcms->below_4g_mem_size; > - if (mem_len > 0) { > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, mem_base, mem_len, i - 1, > - MEM_AFFINITY_ENABLED); > - } > - mem_base = 1ULL << 32; > - mem_len = next_base - pcms->below_4g_mem_size; > - next_base = mem_base + mem_len; > - } > - > - if (mem_len > 0) { > + for (i = 0; i < nstat->mem_ranges_num; i++) { > + mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i); > + if (mem_range->length > 0) { why do we have this condition, I'd assume adevc->build_mem_ranges() shouldn't return empty ranges. > numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, mem_base, mem_len, i - 1, > + build_srat_memory(numamem, mem_range->base, > + mem_range->length, > + mem_range->node, > MEM_AFFINITY_ENABLED); > } > } > + > slots = (table_data->len - numa_start) / sizeof *numamem; > for (; slots < pcms->numa_nodes + 2; slots++) { > numamem = acpi_data_push(table_data, sizeof *numamem); > diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c > index 35d17246e9..20d919c63d 100644 > --- a/hw/isa/lpc_ich9.c > +++ b/hw/isa/lpc_ich9.c > @@ -801,6 +801,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) > adevc->ospm_status = ich9_pm_ospm_status; > adevc->send_event = ich9_send_gpe; > adevc->madt_cpu = pc_madt_cpu_entry; > + adevc->build_mem_ranges = pc_build_mem_ranges; > } > > static const TypeInfo ich9_lpc_info = { > diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h > index 43ff119179..5956b5ea33 100644 > --- a/include/hw/acpi/acpi_dev_interface.h > +++ b/include/hw/acpi/acpi_dev_interface.h > @@ -39,6 +39,8 @@ void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event); > * for CPU indexed by @uid in @apic_ids array, > * returned structure types are: > * 0 - Local APIC, 9 - Local x2APIC, 0xB - GICC > + * build_mem_ranges: build memory ranges of ACPI SRAT (except misc > + * and hotplug SRAT ranges) and HMAT > * > * Interface is designed for providing unified interface > * to generic ACPI functionality that could be used without > @@ -54,5 +56,7 @@ typedef struct AcpiDeviceIfClass { > void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); > void (*madt_cpu)(AcpiDeviceIf *adev, int uid, > const CPUArchIdList *apic_ids, GArray *entry); > + void (*build_mem_ranges)(AcpiDeviceIf *adev, MachineState *ms); > + > } AcpiDeviceIfClass; > #endif > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 5d5636241e..21b9ac3d11 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -281,6 +281,7 @@ void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory); > /* acpi-build.c */ > void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, > const CPUArchIdList *apic_ids, GArray *entry); > +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms); > > /* e820 types */ > #define E820_RAM 1 > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > index 437eb21fef..e3c85b77bc 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -20,6 +20,12 @@ struct NumaNodeMem { > uint64_t node_plugged_mem; > }; > > +typedef struct NumaMemRange { > + uint64_t base; > + uint64_t length; > + uint32_t node; > +} NumaMemRange; > + > struct NumaState { > /* Number of NUMA nodes */ > int num_nodes; > @@ -29,6 +35,12 @@ struct NumaState { > > /* NUMA nodes information */ > NodeInfo nodes[MAX_NODES]; > + > + /* Number of NUMA memory ranges */ > + uint32_t mem_ranges_num; > + > + /* NUMA memory ranges */ > + GArray *mem_ranges; > }; > typedef struct NumaState NumaState; > > diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs > index 9c7393b08c..4f0cdc1a45 100644 > --- a/stubs/Makefile.objs > +++ b/stubs/Makefile.objs > @@ -33,6 +33,7 @@ stub-obj-y += qmp_memory_device.o > stub-obj-y += target-monitor-defs.o > stub-obj-y += target-get-monitor-def.o > stub-obj-y += pc_madt_cpu_entry.o > +stub-obj-y += pc_build_mem_ranges.o > stub-obj-y += vmgenid.o > stub-obj-y += xen-common.o > stub-obj-y += xen-hvm.o > diff --git a/stubs/pc_build_mem_ranges.c b/stubs/pc_build_mem_ranges.c > new file mode 100644 > index 0000000000..997cdfe00b > --- /dev/null > +++ b/stubs/pc_build_mem_ranges.c > @@ -0,0 +1,14 @@ > +/* > + * Stub for pc_build_mem_ranges(). > + * piix4 is used not only pc, but also mips and etc. In order to add > + * build_mem_ranges callback to AcpiDeviceIfClass and use pc_build_mem_ranges > + * in hw/acpi/piix4.c, pc_build_mem_ranges() stub is added to make other arch > + * can compile successfully. > + */ > + > +#include "qemu/osdep.h" > +#include "hw/i386/pc.h" > + > +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms) > +{ > +}
On 7/1/2019 6:59 PM, Igor Mammedov wrote: > On Fri, 14 Jun 2019 23:56:23 +0800 > Tao Xu <tao3.xu@intel.com> wrote: > >> Add build_mem_ranges callback to AcpiDeviceIfClass and use >> it for generating SRAT and HMAT numa memory ranges. >> >> Suggested-by: Igor Mammedov <imammedo@redhat.com> >> Co-developed-by: Liu Jingqi <jingqi.liu@intel.com> >> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com> >> Signed-off-by: Tao Xu <tao3.xu@intel.com> >> --- >> >> Changes in v5 -> v4: >> - Add the missing if 'mem_len > 0' in pc_build_mem_ranges() (Igor) >> - Correct the descriptions of build_mem_ranges >> in AcpiDeviceIfClass (Igor) >> - Use GArray for NUMA memory ranges data (Igor) >> - Add the reason of using stub (Igor) >> --- >> hw/acpi/piix4.c | 1 + >> hw/i386/acpi-build.c | 133 +++++++++++++++++---------- >> hw/isa/lpc_ich9.c | 1 + >> include/hw/acpi/acpi_dev_interface.h | 4 + >> include/hw/i386/pc.h | 1 + >> include/sysemu/numa.h | 12 +++ >> stubs/Makefile.objs | 1 + >> stubs/pc_build_mem_ranges.c | 14 +++ >> 8 files changed, 120 insertions(+), 47 deletions(-) >> create mode 100644 stubs/pc_build_mem_ranges.c >> >> diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c >> index ec4e186cec..bc078c1ad7 100644 >> --- a/hw/acpi/piix4.c >> +++ b/hw/acpi/piix4.c >> @@ -702,6 +702,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) >> adevc->ospm_status = piix4_ospm_status; >> adevc->send_event = piix4_send_gpe; >> adevc->madt_cpu = pc_madt_cpu_entry; >> + adevc->build_mem_ranges = pc_build_mem_ranges; >> } >> >> static const TypeInfo piix4_pm_info = { >> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c >> index 055e677c30..44dd447fa5 100644 >> --- a/hw/i386/acpi-build.c >> +++ b/hw/i386/acpi-build.c >> @@ -2279,18 +2279,89 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) >> #define HOLE_640K_START (640 * KiB) >> #define HOLE_640K_END (1 * MiB) >> >> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms) >> +{ >> + uint64_t mem_len, mem_base, next_base; >> + int i; >> + PCMachineState *pcms = PC_MACHINE(ms); >> + NumaState *nstat = ms->numa_state; >> + NumaMemRange *mem_range; >> + nstat->mem_ranges_num = 0; >> + next_base = 0; >> + >> + /* >> + * the memory map is a bit tricky, it contains at least one hole >> + * from 640k-1M and possibly another one from 3.5G-4G. >> + */ >> + >> + for (i = 0; i < pcms->numa_nodes; ++i) { >> + mem_base = next_base; >> + mem_len = pcms->node_mem[i]; >> + next_base = mem_base + mem_len; >> + >> + /* Cut out the 640K hole */ >> + if (mem_base <= HOLE_640K_START && >> + next_base > HOLE_640K_START) { >> + mem_len -= next_base - HOLE_640K_START; >> + if (mem_len > 0) { >> + mem_range = acpi_data_push(nstat->mem_ranges, >> + sizeof *mem_range); >> + mem_range->base = mem_base; >> + mem_range->length = mem_len; >> + mem_range->node = i; >> + nstat->mem_ranges_num++; >> + } >> + >> + /* Check for the rare case: 640K < RAM < 1M */ >> + if (next_base <= HOLE_640K_END) { >> + next_base = HOLE_640K_END; >> + continue; >> + } >> + mem_base = HOLE_640K_END; >> + mem_len = next_base - HOLE_640K_END; >> + } >> + >> + /* Cut out the ACPI_PCI hole */ >> + if (mem_base <= pcms->below_4g_mem_size && >> + next_base > pcms->below_4g_mem_size) { >> + mem_len -= next_base - pcms->below_4g_mem_size; >> + if (mem_len > 0) { >> + mem_range = acpi_data_push(nstat->mem_ranges, >> + sizeof *mem_range); >> + mem_range->base = mem_base; >> + mem_range->length = mem_len; >> + mem_range->node = i; >> + nstat->mem_ranges_num++; >> + } >> + mem_base = 1ULL << 32; >> + mem_len = next_base - pcms->below_4g_mem_size; >> + next_base = mem_base + mem_len; >> + } >> + if (mem_len > 0) { >> + mem_range = acpi_data_push(nstat->mem_ranges, >> + sizeof *mem_range); >> + mem_range->base = mem_base; >> + mem_range->length = mem_len; >> + mem_range->node = i; >> + nstat->mem_ranges_num++; >> + } >> + } >> +} >> + >> static void >> build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) >> { >> AcpiSystemResourceAffinityTable *srat; >> AcpiSratMemoryAffinity *numamem; >> >> - int i; >> - int srat_start, numa_start, slots; >> - uint64_t mem_len, mem_base, next_base; >> + int i, srat_start, numa_start, slots; >> MachineClass *mc = MACHINE_GET_CLASS(machine); >> const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); >> PCMachineState *pcms = PC_MACHINE(machine); >> + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); >> + AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev); >> + NumaState *nstat = machine->numa_state; >> + NumaMemRange *mem_range; >> ram_addr_t hotplugabble_address_space_size = >> object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, >> NULL); >> @@ -2327,57 +2398,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) >> } >> } >> >> + if (pcms->numa_nodes && !nstat->mem_ranges_num) { > suggest to drop nstat->mem_ranges_num field and use nstat->mem_ranges->len instead, > also its probably better to initialize nstat->mem_ranges > at the same place where ms->numa_state is initialized, so that > a specific platform code won't need to duplicate it. OK, I will improve it. > >> + nstat->mem_ranges = g_array_new(false, true /* clear */, >> + sizeof *mem_range); >> + adevc->build_mem_ranges(adev, machine); >> + } >> >> - /* the memory map is a bit tricky, it contains at least one hole >> - * from 640k-1M and possibly another one from 3.5G-4G. >> - */ >> - next_base = 0; >> numa_start = table_data->len; >> >> - for (i = 1; i < pcms->numa_nodes + 1; ++i) { >> - mem_base = next_base; >> - mem_len = pcms->node_mem[i - 1]; >> - next_base = mem_base + mem_len; >> - >> - /* Cut out the 640K hole */ >> - if (mem_base <= HOLE_640K_START && >> - next_base > HOLE_640K_START) { >> - mem_len -= next_base - HOLE_640K_START; >> - if (mem_len > 0) { >> - numamem = acpi_data_push(table_data, sizeof *numamem); >> - build_srat_memory(numamem, mem_base, mem_len, i - 1, >> - MEM_AFFINITY_ENABLED); >> - } >> - >> - /* Check for the rare case: 640K < RAM < 1M */ >> - if (next_base <= HOLE_640K_END) { >> - next_base = HOLE_640K_END; >> - continue; >> - } >> - mem_base = HOLE_640K_END; >> - mem_len = next_base - HOLE_640K_END; >> - } >> - >> - /* Cut out the ACPI_PCI hole */ >> - if (mem_base <= pcms->below_4g_mem_size && >> - next_base > pcms->below_4g_mem_size) { >> - mem_len -= next_base - pcms->below_4g_mem_size; >> - if (mem_len > 0) { >> - numamem = acpi_data_push(table_data, sizeof *numamem); >> - build_srat_memory(numamem, mem_base, mem_len, i - 1, >> - MEM_AFFINITY_ENABLED); >> - } >> - mem_base = 1ULL << 32; >> - mem_len = next_base - pcms->below_4g_mem_size; >> - next_base = mem_base + mem_len; >> - } >> - >> - if (mem_len > 0) { >> + for (i = 0; i < nstat->mem_ranges_num; i++) { >> + mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i); >> + if (mem_range->length > 0) { > why do we have this condition, > I'd assume adevc->build_mem_ranges() shouldn't return empty ranges. > OK I will drop this condition. >> numamem = acpi_data_push(table_data, sizeof *numamem); >> - build_srat_memory(numamem, mem_base, mem_len, i - 1, >> + build_srat_memory(numamem, mem_range->base, >> + mem_range->length, >> + mem_range->node, >> MEM_AFFINITY_ENABLED); >> } >> } >> + >> slots = (table_data->len - numa_start) / sizeof *numamem; >> for (; slots < pcms->numa_nodes + 2; slots++) { >> numamem = acpi_data_push(table_data, sizeof *numamem); >> diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c >> index 35d17246e9..20d919c63d 100644 >> --- a/hw/isa/lpc_ich9.c >> +++ b/hw/isa/lpc_ich9.c >> @@ -801,6 +801,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) >> adevc->ospm_status = ich9_pm_ospm_status; >> adevc->send_event = ich9_send_gpe; >> adevc->madt_cpu = pc_madt_cpu_entry; >> + adevc->build_mem_ranges = pc_build_mem_ranges; >> } >> >> static const TypeInfo ich9_lpc_info = { >> diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h >> index 43ff119179..5956b5ea33 100644 >> --- a/include/hw/acpi/acpi_dev_interface.h >> +++ b/include/hw/acpi/acpi_dev_interface.h >> @@ -39,6 +39,8 @@ void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event); >> * for CPU indexed by @uid in @apic_ids array, >> * returned structure types are: >> * 0 - Local APIC, 9 - Local x2APIC, 0xB - GICC >> + * build_mem_ranges: build memory ranges of ACPI SRAT (except misc >> + * and hotplug SRAT ranges) and HMAT >> * >> * Interface is designed for providing unified interface >> * to generic ACPI functionality that could be used without >> @@ -54,5 +56,7 @@ typedef struct AcpiDeviceIfClass { >> void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); >> void (*madt_cpu)(AcpiDeviceIf *adev, int uid, >> const CPUArchIdList *apic_ids, GArray *entry); >> + void (*build_mem_ranges)(AcpiDeviceIf *adev, MachineState *ms); >> + >> } AcpiDeviceIfClass; >> #endif >> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h >> index 5d5636241e..21b9ac3d11 100644 >> --- a/include/hw/i386/pc.h >> +++ b/include/hw/i386/pc.h >> @@ -281,6 +281,7 @@ void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory); >> /* acpi-build.c */ >> void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, >> const CPUArchIdList *apic_ids, GArray *entry); >> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms); >> >> /* e820 types */ >> #define E820_RAM 1 >> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h >> index 437eb21fef..e3c85b77bc 100644 >> --- a/include/sysemu/numa.h >> +++ b/include/sysemu/numa.h >> @@ -20,6 +20,12 @@ struct NumaNodeMem { >> uint64_t node_plugged_mem; >> }; >> >> +typedef struct NumaMemRange { >> + uint64_t base; >> + uint64_t length; >> + uint32_t node; >> +} NumaMemRange; >> + >> struct NumaState { >> /* Number of NUMA nodes */ >> int num_nodes; >> @@ -29,6 +35,12 @@ struct NumaState { >> >> /* NUMA nodes information */ >> NodeInfo nodes[MAX_NODES]; >> + >> + /* Number of NUMA memory ranges */ >> + uint32_t mem_ranges_num; >> + >> + /* NUMA memory ranges */ >> + GArray *mem_ranges; >> }; >> typedef struct NumaState NumaState; >> >> diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs >> index 9c7393b08c..4f0cdc1a45 100644 >> --- a/stubs/Makefile.objs >> +++ b/stubs/Makefile.objs >> @@ -33,6 +33,7 @@ stub-obj-y += qmp_memory_device.o >> stub-obj-y += target-monitor-defs.o >> stub-obj-y += target-get-monitor-def.o >> stub-obj-y += pc_madt_cpu_entry.o >> +stub-obj-y += pc_build_mem_ranges.o >> stub-obj-y += vmgenid.o >> stub-obj-y += xen-common.o >> stub-obj-y += xen-hvm.o >> diff --git a/stubs/pc_build_mem_ranges.c b/stubs/pc_build_mem_ranges.c >> new file mode 100644 >> index 0000000000..997cdfe00b >> --- /dev/null >> +++ b/stubs/pc_build_mem_ranges.c >> @@ -0,0 +1,14 @@ >> +/* >> + * Stub for pc_build_mem_ranges(). >> + * piix4 is used not only pc, but also mips and etc. In order to add >> + * build_mem_ranges callback to AcpiDeviceIfClass and use pc_build_mem_ranges >> + * in hw/acpi/piix4.c, pc_build_mem_ranges() stub is added to make other arch >> + * can compile successfully. >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "hw/i386/pc.h" >> + >> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms) >> +{ >> +} >
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index ec4e186cec..bc078c1ad7 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -702,6 +702,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) adevc->ospm_status = piix4_ospm_status; adevc->send_event = piix4_send_gpe; adevc->madt_cpu = pc_madt_cpu_entry; + adevc->build_mem_ranges = pc_build_mem_ranges; } static const TypeInfo piix4_pm_info = { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 055e677c30..44dd447fa5 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2279,18 +2279,89 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) #define HOLE_640K_START (640 * KiB) #define HOLE_640K_END (1 * MiB) +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms) +{ + uint64_t mem_len, mem_base, next_base; + int i; + PCMachineState *pcms = PC_MACHINE(ms); + NumaState *nstat = ms->numa_state; + NumaMemRange *mem_range; + nstat->mem_ranges_num = 0; + next_base = 0; + + /* + * the memory map is a bit tricky, it contains at least one hole + * from 640k-1M and possibly another one from 3.5G-4G. + */ + + for (i = 0; i < pcms->numa_nodes; ++i) { + mem_base = next_base; + mem_len = pcms->node_mem[i]; + next_base = mem_base + mem_len; + + /* Cut out the 640K hole */ + if (mem_base <= HOLE_640K_START && + next_base > HOLE_640K_START) { + mem_len -= next_base - HOLE_640K_START; + if (mem_len > 0) { + mem_range = acpi_data_push(nstat->mem_ranges, + sizeof *mem_range); + mem_range->base = mem_base; + mem_range->length = mem_len; + mem_range->node = i; + nstat->mem_ranges_num++; + } + + /* Check for the rare case: 640K < RAM < 1M */ + if (next_base <= HOLE_640K_END) { + next_base = HOLE_640K_END; + continue; + } + mem_base = HOLE_640K_END; + mem_len = next_base - HOLE_640K_END; + } + + /* Cut out the ACPI_PCI hole */ + if (mem_base <= pcms->below_4g_mem_size && + next_base > pcms->below_4g_mem_size) { + mem_len -= next_base - pcms->below_4g_mem_size; + if (mem_len > 0) { + mem_range = acpi_data_push(nstat->mem_ranges, + sizeof *mem_range); + mem_range->base = mem_base; + mem_range->length = mem_len; + mem_range->node = i; + nstat->mem_ranges_num++; + } + mem_base = 1ULL << 32; + mem_len = next_base - pcms->below_4g_mem_size; + next_base = mem_base + mem_len; + } + if (mem_len > 0) { + mem_range = acpi_data_push(nstat->mem_ranges, + sizeof *mem_range); + mem_range->base = mem_base; + mem_range->length = mem_len; + mem_range->node = i; + nstat->mem_ranges_num++; + } + } +} + static void build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) { AcpiSystemResourceAffinityTable *srat; AcpiSratMemoryAffinity *numamem; - int i; - int srat_start, numa_start, slots; - uint64_t mem_len, mem_base, next_base; + int i, srat_start, numa_start, slots; MachineClass *mc = MACHINE_GET_CLASS(machine); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); PCMachineState *pcms = PC_MACHINE(machine); + AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev); + AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev); + NumaState *nstat = machine->numa_state; + NumaMemRange *mem_range; ram_addr_t hotplugabble_address_space_size = object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE, NULL); @@ -2327,57 +2398,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } } + if (pcms->numa_nodes && !nstat->mem_ranges_num) { + nstat->mem_ranges = g_array_new(false, true /* clear */, + sizeof *mem_range); + adevc->build_mem_ranges(adev, machine); + } - /* the memory map is a bit tricky, it contains at least one hole - * from 640k-1M and possibly another one from 3.5G-4G. - */ - next_base = 0; numa_start = table_data->len; - for (i = 1; i < pcms->numa_nodes + 1; ++i) { - mem_base = next_base; - mem_len = pcms->node_mem[i - 1]; - next_base = mem_base + mem_len; - - /* Cut out the 640K hole */ - if (mem_base <= HOLE_640K_START && - next_base > HOLE_640K_START) { - mem_len -= next_base - HOLE_640K_START; - if (mem_len > 0) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, - MEM_AFFINITY_ENABLED); - } - - /* Check for the rare case: 640K < RAM < 1M */ - if (next_base <= HOLE_640K_END) { - next_base = HOLE_640K_END; - continue; - } - mem_base = HOLE_640K_END; - mem_len = next_base - HOLE_640K_END; - } - - /* Cut out the ACPI_PCI hole */ - if (mem_base <= pcms->below_4g_mem_size && - next_base > pcms->below_4g_mem_size) { - mem_len -= next_base - pcms->below_4g_mem_size; - if (mem_len > 0) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, - MEM_AFFINITY_ENABLED); - } - mem_base = 1ULL << 32; - mem_len = next_base - pcms->below_4g_mem_size; - next_base = mem_base + mem_len; - } - - if (mem_len > 0) { + for (i = 0; i < nstat->mem_ranges_num; i++) { + mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i); + if (mem_range->length > 0) { numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, + build_srat_memory(numamem, mem_range->base, + mem_range->length, + mem_range->node, MEM_AFFINITY_ENABLED); } } + slots = (table_data->len - numa_start) / sizeof *numamem; for (; slots < pcms->numa_nodes + 2; slots++) { numamem = acpi_data_push(table_data, sizeof *numamem); diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 35d17246e9..20d919c63d 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -801,6 +801,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data) adevc->ospm_status = ich9_pm_ospm_status; adevc->send_event = ich9_send_gpe; adevc->madt_cpu = pc_madt_cpu_entry; + adevc->build_mem_ranges = pc_build_mem_ranges; } static const TypeInfo ich9_lpc_info = { diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index 43ff119179..5956b5ea33 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -39,6 +39,8 @@ void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event); * for CPU indexed by @uid in @apic_ids array, * returned structure types are: * 0 - Local APIC, 9 - Local x2APIC, 0xB - GICC + * build_mem_ranges: build memory ranges of ACPI SRAT (except misc + * and hotplug SRAT ranges) and HMAT * * Interface is designed for providing unified interface * to generic ACPI functionality that could be used without @@ -54,5 +56,7 @@ typedef struct AcpiDeviceIfClass { void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); void (*madt_cpu)(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry); + void (*build_mem_ranges)(AcpiDeviceIf *adev, MachineState *ms); + } AcpiDeviceIfClass; #endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 5d5636241e..21b9ac3d11 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -281,6 +281,7 @@ void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory); /* acpi-build.c */ void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry); +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms); /* e820 types */ #define E820_RAM 1 diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 437eb21fef..e3c85b77bc 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -20,6 +20,12 @@ struct NumaNodeMem { uint64_t node_plugged_mem; }; +typedef struct NumaMemRange { + uint64_t base; + uint64_t length; + uint32_t node; +} NumaMemRange; + struct NumaState { /* Number of NUMA nodes */ int num_nodes; @@ -29,6 +35,12 @@ struct NumaState { /* NUMA nodes information */ NodeInfo nodes[MAX_NODES]; + + /* Number of NUMA memory ranges */ + uint32_t mem_ranges_num; + + /* NUMA memory ranges */ + GArray *mem_ranges; }; typedef struct NumaState NumaState; diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 9c7393b08c..4f0cdc1a45 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -33,6 +33,7 @@ stub-obj-y += qmp_memory_device.o stub-obj-y += target-monitor-defs.o stub-obj-y += target-get-monitor-def.o stub-obj-y += pc_madt_cpu_entry.o +stub-obj-y += pc_build_mem_ranges.o stub-obj-y += vmgenid.o stub-obj-y += xen-common.o stub-obj-y += xen-hvm.o diff --git a/stubs/pc_build_mem_ranges.c b/stubs/pc_build_mem_ranges.c new file mode 100644 index 0000000000..997cdfe00b --- /dev/null +++ b/stubs/pc_build_mem_ranges.c @@ -0,0 +1,14 @@ +/* + * Stub for pc_build_mem_ranges(). + * piix4 is used not only pc, but also mips and etc. In order to add + * build_mem_ranges callback to AcpiDeviceIfClass and use pc_build_mem_ranges + * in hw/acpi/piix4.c, pc_build_mem_ranges() stub is added to make other arch + * can compile successfully. + */ + +#include "qemu/osdep.h" +#include "hw/i386/pc.h" + +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms) +{ +}