@@ -67,3 +67,4 @@ CONFIG_I2C=y
CONFIG_SEV=$(CONFIG_KVM)
CONFIG_VTD=y
CONFIG_AMD_IOMMU=y
+CONFIG_ACPI_HMAT=y
@@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
common-obj-y += acpi_interface.o
new file mode 100644
@@ -0,0 +1,139 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2018 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ *
+ * HMAT is defined in ACPI 6.2.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "unistd.h"
+#include "fcntl.h"
+#include "qemu/osdep.h"
+#include "sysemu/numa.h"
+#include "hw/i386/pc.h"
+#include "hw/i386/acpi-build.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/hmat.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/nvram/fw_cfg.h"
+#include "hw/acpi/bios-linker-loader.h"
+
+/* Build Memory Subsystem Address Range Structure */
+static void hmat_build_spa_info(GArray *table_data,
+ uint64_t base, uint64_t length, int node)
+{
+ uint16_t flags = 0;
+
+ if (numa_info[node].is_initiator) {
+ flags |= HMAT_SPA_PROC_VALID;
+ }
+ if (numa_info[node].is_target) {
+ flags |= HMAT_SPA_MEM_VALID;
+ }
+
+ /* Type */
+ build_append_int_noprefix(table_data, ACPI_HMAT_SPA, sizeof(uint16_t));
+ /* Reserved0 */
+ build_append_int_noprefix(table_data, 0, sizeof(uint16_t));
+ /* Length */
+ build_append_int_noprefix(table_data, sizeof(AcpiHmatSpaRange),
+ sizeof(uint32_t));
+ /* Flags */
+ build_append_int_noprefix(table_data, flags, sizeof(uint16_t));
+ /* Reserved1 */
+ build_append_int_noprefix(table_data, 0, sizeof(uint16_t));
+ /* Process Proximity Domain */
+ build_append_int_noprefix(table_data, node, sizeof(uint32_t));
+ /* Memory Proximity Domain */
+ build_append_int_noprefix(table_data, node, sizeof(uint32_t));
+ /* Reserved2 */
+ build_append_int_noprefix(table_data, 0, sizeof(uint32_t));
+ /* System Physical Address Range Base */
+ build_append_int_noprefix(table_data, base, sizeof(uint64_t));
+ /* System Physical Address Range Length */
+ build_append_int_noprefix(table_data, length, sizeof(uint64_t));
+}
+
+static int pc_dimm_device_list(Object *obj, void *opaque)
+{
+ GSList **list = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+ *list = g_slist_append(*list, DEVICE(obj));
+ }
+
+ object_child_foreach(obj, pc_dimm_device_list, opaque);
+ return 0;
+}
+
+/*
+ * The Proximity Domain of System Physical Address ranges defined
+ * in the HMAT, NFIT and SRAT tables shall match each other.
+ */
+static void hmat_build_spa(GArray *table_data, PCMachineState *pcms)
+{
+ GSList *device_list = NULL;
+ uint64_t mem_base, mem_len;
+ int i;
+
+ if (pcms->numa_nodes && !mem_ranges_number) {
+ build_mem_ranges(pcms);
+ }
+
+ for (i = 0; i < mem_ranges_number; i++) {
+ hmat_build_spa_info(table_data, mem_ranges[i].base,
+ mem_ranges[i].length, mem_ranges[i].node);
+ }
+
+ /* Build HMAT SPA structures for PC-DIMM devices. */
+ object_child_foreach(qdev_get_machine(), pc_dimm_device_list, &device_list);
+
+ for (; device_list; device_list = device_list->next) {
+ PCDIMMDevice *dimm = device_list->data;
+ mem_base = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+ NULL);
+ mem_len = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
+ NULL);
+ i = object_property_get_uint(OBJECT(dimm), PC_DIMM_NODE_PROP, NULL);
+ hmat_build_spa_info(table_data, mem_base, mem_len, i);
+ }
+}
+
+static void hmat_build_hma(GArray *hma, PCMachineState *pcms)
+{
+ /* Build HMAT Memory Subsystem Address Range. */
+ hmat_build_spa(hma, pcms);
+}
+
+void hmat_build_acpi(GArray *table_data, BIOSLinker *linker,
+ MachineState *machine)
+{
+ PCMachineState *pcms = PC_MACHINE(machine);
+ uint64_t hmat_start, hmat_len;
+
+ hmat_start = table_data->len;
+ acpi_data_push(table_data, sizeof(AcpiHmat));
+
+ hmat_build_hma(table_data, pcms);
+ hmat_len = table_data->len - hmat_start;
+
+ build_header(linker, table_data,
+ (void *)(table_data->data + hmat_start),
+ "HMAT", hmat_len, 1, NULL, NULL);
+}
new file mode 100644
@@ -0,0 +1,73 @@
+/*
+ * HMAT ACPI Implementation Header
+ *
+ * Copyright(C) 2018 Intel Corporation.
+ *
+ * Author:
+ * Liu jingqi <jingqi.liu@linux.intel.com>
+ *
+ * HMAT is defined in ACPI 6.2.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef HMAT_H
+#define HMAT_H
+
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi-defs.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/acpi/aml-build.h"
+
+#define ACPI_HMAT_SPA 0
+
+/* ACPI HMAT sub-structure header */
+#define ACPI_HMAT_SUB_HEADER_DEF \
+ uint16_t type; \
+ uint16_t reserved0; \
+ uint32_t length;
+
+/* the values of AcpiHmatSpaRange flag */
+enum {
+ HMAT_SPA_PROC_VALID = 0x1,
+ HMAT_SPA_MEM_VALID = 0x2,
+ HMAT_SPA_RESERVATION_HINT = 0x4,
+};
+
+/*
+ * HMAT (Heterogeneous Memory Attributes Table)
+ */
+struct AcpiHmat {
+ ACPI_TABLE_HEADER_DEF
+ uint32_t reserved;
+} QEMU_PACKED;
+typedef struct AcpiHmat AcpiHmat;
+
+struct AcpiHmatSpaRange {
+ ACPI_HMAT_SUB_HEADER_DEF
+ uint16_t flags;
+ uint16_t reserved1;
+ uint32_t proc_proximity;
+ uint32_t mem_proximity;
+ uint32_t reserved2;
+ uint64_t spa_base;
+ uint64_t spa_length;
+} QEMU_PACKED;
+typedef struct AcpiHmatSpaRange AcpiHmatSpaRange;
+
+void hmat_build_acpi(GArray *table_data, BIOSLinker *linker,
+ MachineState *machine);
+
+#endif
@@ -64,6 +64,7 @@
#include "hw/i386/intel_iommu.h"
#include "hw/acpi/ipmi.h"
+#include "hw/acpi/hmat.h"
/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
* -M pc-i440fx-2.0. Even if the actual amount of AML generated grows
@@ -119,6 +120,14 @@ typedef struct AcpiBuildPciBusHotplugState {
bool pcihp_bridge_en;
} AcpiBuildPciBusHotplugState;
+/* The memory contains at least one hole
+ * from 640k-1M and possibly another one from 3.5G-4G.
+ * So far, the number of memory ranges is up to 2
+ * more than the number of numa nodes.
+ */
+MemoryRange mem_ranges[MAX_NODES + 2];
+uint32_t mem_ranges_number;
+
static void init_common_fadt_data(Object *o, AcpiFadtData *data)
{
uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
@@ -2251,6 +2260,63 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
#define HOLE_640K_START (640 * KiB)
#define HOLE_640K_END (1 * MiB)
+void build_mem_ranges(PCMachineState *pcms)
+{
+ uint64_t mem_len, mem_base, next_base;
+ int i;
+
+ /* the memory map is a bit tricky, it contains at least one hole
+ * from 640k-1M and possibly another one from 3.5G-4G.
+ */
+ mem_ranges_number = 0;
+ next_base = 0;
+
+ for (i = 0; i < pcms->numa_nodes; ++i) {
+ mem_base = next_base;
+ mem_len = pcms->node_mem[i];
+ next_base = mem_base + mem_len;
+
+ /* Cut out the 640K hole */
+ if (mem_base <= HOLE_640K_START &&
+ next_base > HOLE_640K_START) {
+ mem_len -= next_base - HOLE_640K_START;
+ if (mem_len > 0) {
+ mem_ranges[mem_ranges_number].base = mem_base;
+ mem_ranges[mem_ranges_number].length = mem_len;
+ mem_ranges[mem_ranges_number].node = i;
+ mem_ranges_number++;
+ }
+
+ /* Check for the rare case: 640K < RAM < 1M */
+ if (next_base <= HOLE_640K_END) {
+ next_base = HOLE_640K_END;
+ continue;
+ }
+ mem_base = HOLE_640K_END;
+ mem_len = next_base - HOLE_640K_END;
+ }
+
+ /* Cut out the ACPI_PCI hole */
+ if (mem_base <= pcms->below_4g_mem_size &&
+ next_base > pcms->below_4g_mem_size) {
+ mem_len -= next_base - pcms->below_4g_mem_size;
+ if (mem_len > 0) {
+ mem_ranges[mem_ranges_number].base = mem_base;
+ mem_ranges[mem_ranges_number].length = mem_len;
+ mem_ranges[mem_ranges_number].node = i;
+ mem_ranges_number++;
+ }
+ mem_base = 1ULL << 32;
+ mem_len = next_base - pcms->below_4g_mem_size;
+ next_base = mem_base + mem_len;
+ }
+ mem_ranges[mem_ranges_number].base = mem_base;
+ mem_ranges[mem_ranges_number].length = mem_len;
+ mem_ranges[mem_ranges_number].node = i;
+ mem_ranges_number++;
+ }
+}
+
static void
build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
{
@@ -2259,7 +2325,6 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
int i;
int srat_start, numa_start, slots;
- uint64_t mem_len, mem_base, next_base;
MachineClass *mc = MACHINE_GET_CLASS(machine);
const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine);
PCMachineState *pcms = PC_MACHINE(machine);
@@ -2299,54 +2364,18 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
}
}
+ if (pcms->numa_nodes && !mem_ranges_number) {
+ build_mem_ranges(pcms);
+ }
- /* the memory map is a bit tricky, it contains at least one hole
- * from 640k-1M and possibly another one from 3.5G-4G.
- */
- next_base = 0;
numa_start = table_data->len;
- for (i = 1; i < pcms->numa_nodes + 1; ++i) {
- mem_base = next_base;
- mem_len = pcms->node_mem[i - 1];
- next_base = mem_base + mem_len;
-
- /* Cut out the 640K hole */
- if (mem_base <= HOLE_640K_START &&
- next_base > HOLE_640K_START) {
- mem_len -= next_base - HOLE_640K_START;
- if (mem_len > 0) {
- numamem = acpi_data_push(table_data, sizeof *numamem);
- build_srat_memory(numamem, mem_base, mem_len, i - 1,
- MEM_AFFINITY_ENABLED);
- }
-
- /* Check for the rare case: 640K < RAM < 1M */
- if (next_base <= HOLE_640K_END) {
- next_base = HOLE_640K_END;
- continue;
- }
- mem_base = HOLE_640K_END;
- mem_len = next_base - HOLE_640K_END;
- }
-
- /* Cut out the ACPI_PCI hole */
- if (mem_base <= pcms->below_4g_mem_size &&
- next_base > pcms->below_4g_mem_size) {
- mem_len -= next_base - pcms->below_4g_mem_size;
- if (mem_len > 0) {
- numamem = acpi_data_push(table_data, sizeof *numamem);
- build_srat_memory(numamem, mem_base, mem_len, i - 1,
- MEM_AFFINITY_ENABLED);
- }
- mem_base = 1ULL << 32;
- mem_len = next_base - pcms->below_4g_mem_size;
- next_base = mem_base + mem_len;
- }
-
- if (mem_len > 0) {
+ for (i = 0; i < mem_ranges_number; i++) {
+ if (mem_ranges[i].length > 0) {
numamem = acpi_data_push(table_data, sizeof *numamem);
- build_srat_memory(numamem, mem_base, mem_len, i - 1,
+ build_srat_memory(numamem, mem_ranges[i].base,
+ mem_ranges[i].length,
+ mem_ranges[i].node,
MEM_AFFINITY_ENABLED);
}
}
@@ -2669,6 +2698,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
build_slit(tables_blob, tables->linker);
}
+ acpi_add_table(table_offsets, tables_blob);
+ hmat_build_acpi(tables_blob, tables->linker, machine);
}
if (acpi_get_mcfg(&mcfg)) {
acpi_add_table(table_offsets, tables_blob);
@@ -2,6 +2,16 @@
#ifndef HW_I386_ACPI_BUILD_H
#define HW_I386_ACPI_BUILD_H
+typedef struct memory_range {
+ uint64_t base;
+ uint64_t length;
+ uint32_t node;
+} MemoryRange;
+
+extern MemoryRange mem_ranges[];
+extern uint32_t mem_ranges_number;
+
+void build_mem_ranges(PCMachineState *pcms);
void acpi_setup(void);
#endif
@@ -13,6 +13,8 @@ struct NodeInfo {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
bool present;
+ bool is_initiator;
+ bool is_target;
uint8_t distance[MAX_NODES];
};
@@ -105,6 +105,10 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
}
}
+ if (node->cpus) {
+ numa_info[nodenr].is_initiator = true;
+ }
+
if (node->has_mem && node->has_memdev) {
error_setg(errp, "cannot specify both mem= and memdev=");
return;
@@ -121,6 +125,7 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
if (node->has_mem) {
numa_info[nodenr].node_mem = node->mem;
+ numa_info[nodenr].is_target = true;
}
if (node->has_memdev) {
Object *o;
@@ -133,6 +138,7 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
object_ref(o);
numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL);
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
+ numa_info[nodenr].is_target = true;
}
numa_info[nodenr].present = true;
max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);